24  Introducción rápida a Pandas

import pandas as pd
import matplotlib.pyplot as plt
%%timeit
f = '../../data/Temixco_2018_10Min.csv'
pd.read_csv(f,index_col=0,parse_dates=True)
58.5 ms ± 903 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
%%timeit
f = '../../data/Temixco_2018_10Min.xlsx'
pd.read_excel(f,index_col=0,parse_dates=True)
3.93 s ± 101 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
f = '../../data/Temixco_2018_10Min.csv'
tmx = pd.read_csv(f,index_col=0,parse_dates=True)
tmx.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52560 entries, 2018-01-01 00:00:00 to 2018-12-31 23:50:00
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ib      52423 non-null  float64
 1   Ig      52423 non-null  float64
 2   To      52560 non-null  float64
 3   RH      52560 non-null  float64
 4   WS      52560 non-null  float64
 5   WD      52560 non-null  float64
 6   P       52560 non-null  float64
dtypes: float64(7)
memory usage: 3.2 MB

image.png
tmx.plot(subplots=True); ## hacer notar que algunas veces toma mucho tiempo
Error in callback <function flush_figures at 0x125cfb7e0> (for post_execute), with arguments args (),kwargs {}:
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib_inline/backend_inline.py:126, in flush_figures()
    123 if InlineBackend.instance().close_figures:
    124     # ignore the tracking, just draw and close all figures
    125     try:
--> 126         return show(True)
    127     except Exception as e:
    128         # safely show traceback if in IPython, else raise
    129         ip = get_ipython()

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib_inline/backend_inline.py:90, in show(close, block)
     88 try:
     89     for figure_manager in Gcf.get_all_fig_managers():
---> 90         display(
     91             figure_manager.canvas.figure,
     92             metadata=_fetch_figure_metadata(figure_manager.canvas.figure)
     93         )
     94 finally:
     95     show._to_draw = []

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/IPython/core/display_functions.py:298, in display(include, exclude, metadata, transient, display_id, raw, clear, *objs, **kwargs)
    296     publish_display_data(data=obj, metadata=metadata, **kwargs)
    297 else:
--> 298     format_dict, md_dict = format(obj, include=include, exclude=exclude)
    299     if not format_dict:
    300         # nothing to display (e.g. _ipython_display_ took over)
    301         continue

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/IPython/core/formatters.py:179, in DisplayFormatter.format(self, obj, include, exclude)
    177 md = None
    178 try:
--> 179     data = formatter(obj)
    180 except:
    181     # FIXME: log the exception
    182     raise

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
    230 if not kwsyntax:
    231     args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/IPython/core/formatters.py:223, in catch_format_error(method, self, *args, **kwargs)
    221 """show traceback on failed format call"""
    222 try:
--> 223     r = method(self, *args, **kwargs)
    224 except NotImplementedError:
    225     # don't warn on NotImplementedErrors
    226     return self._check_return(None, args[0])

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/IPython/core/formatters.py:340, in BaseFormatter.__call__(self, obj)
    338     pass
    339 else:
--> 340     return printer(obj)
    341 # Finally look for special method names
    342 method = get_real_method(obj, self.print_method)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/IPython/core/pylabtools.py:152, in print_figure(fig, fmt, bbox_inches, base64, **kwargs)
    149     from matplotlib.backend_bases import FigureCanvasBase
    150     FigureCanvasBase(fig)
--> 152 fig.canvas.print_figure(bytes_io, **kw)
    153 data = bytes_io.getvalue()
    154 if fmt == 'svg':

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/backend_bases.py:2178, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
   2176 if bbox_inches:
   2177     if bbox_inches == "tight":
-> 2178         bbox_inches = self.figure.get_tightbbox(
   2179             renderer, bbox_extra_artists=bbox_extra_artists)
   2180         if (isinstance(layout_engine, ConstrainedLayoutEngine) and
   2181                 pad_inches == "layout"):
   2182             h_pad = layout_engine.get()["h_pad"]

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/_api/deprecation.py:457, in make_keyword_only.<locals>.wrapper(*args, **kwargs)
    451 if len(args) > name_idx:
    452     warn_deprecated(
    453         since, message="Passing the %(name)s %(obj_type)s "
    454         "positionally is deprecated since Matplotlib %(since)s; the "
    455         "parameter will become keyword-only %(removal)s.",
    456         name=name, obj_type=f"parameter of {func.__name__}()")
--> 457 return func(*args, **kwargs)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/figure.py:1787, in FigureBase.get_tightbbox(self, renderer, bbox_extra_artists)
   1783 if ax.get_visible():
   1784     # some Axes don't take the bbox_extra_artists kwarg so we
   1785     # need this conditional....
   1786     try:
-> 1787         bbox = ax.get_tightbbox(
   1788             renderer, bbox_extra_artists=bbox_extra_artists)
   1789     except TypeError:
   1790         bbox = ax.get_tightbbox(renderer)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/_api/deprecation.py:457, in make_keyword_only.<locals>.wrapper(*args, **kwargs)
    451 if len(args) > name_idx:
    452     warn_deprecated(
    453         since, message="Passing the %(name)s %(obj_type)s "
    454         "positionally is deprecated since Matplotlib %(since)s; the "
    455         "parameter will become keyword-only %(removal)s.",
    456         name=name, obj_type=f"parameter of {func.__name__}()")
--> 457 return func(*args, **kwargs)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/axes/_base.py:4482, in _AxesBase.get_tightbbox(self, renderer, call_axes_locator, bbox_extra_artists, for_layout_only)
   4480 for axis in self._axis_map.values():
   4481     if self.axison and axis.get_visible():
-> 4482         ba = martist._get_tightbbox_for_layout_only(axis, renderer)
   4483         if ba:
   4484             bb.append(ba)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/artist.py:1408, in _get_tightbbox_for_layout_only(obj, *args, **kwargs)
   1402 """
   1403 Matplotlib's `.Axes.get_tightbbox` and `.Axis.get_tightbbox` support a
   1404 *for_layout_only* kwarg; this helper tries to use the kwarg but skips it
   1405 when encountering third-party subclasses that do not support it.
   1406 """
   1407 try:
-> 1408     return obj.get_tightbbox(*args, **{**kwargs, "for_layout_only": True})
   1409 except TypeError:
   1410     return obj.get_tightbbox(*args, **kwargs)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/axis.py:1371, in Axis.get_tightbbox(self, renderer, for_layout_only)
   1368     renderer = self.figure._get_renderer()
   1369 ticks_to_draw = self._update_ticks()
-> 1371 self._update_label_position(renderer)
   1373 # go back to just this axis's tick labels
   1374 tlb1, tlb2 = self._get_ticklabel_bboxes(ticks_to_draw, renderer)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/axis.py:2410, in XAxis._update_label_position(self, renderer)
   2408 try:
   2409     spine = self.axes.spines['bottom']
-> 2410     spinebbox = spine.get_window_extent()
   2411 except KeyError:
   2412     # use Axes if spine doesn't exist
   2413     spinebbox = self.axes.bbox

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/spines.py:158, in Spine.get_window_extent(self, renderer)
    156     return bb
    157 bboxes = [bb]
--> 158 drawn_ticks = self.axis._update_ticks()
    160 major_tick = next(iter({*drawn_ticks} & {*self.axis.majorTicks}), None)
    161 minor_tick = next(iter({*drawn_ticks} & {*self.axis.minorTicks}), None)

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/axis.py:1306, in Axis._update_ticks(self)
   1304     tick.label1.set_text(label)
   1305     tick.label2.set_text(label)
-> 1306 minor_locs = self.get_minorticklocs()
   1307 minor_labels = self.minor.formatter.format_ticks(minor_locs)
   1308 minor_ticks = self.get_minor_ticks(len(minor_locs))

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/matplotlib/axis.py:1535, in Axis.get_minorticklocs(self)
   1533 """Return this Axis' minor tick locations in data coordinates."""
   1534 # Remove minor ticks duplicating major ticks.
-> 1535 minor_locs = np.asarray(self.minor.locator())
   1536 if self.remove_overlapping_locs:
   1537     major_locs = self.major.locator()

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/plotting/_matplotlib/converter.py:1001, in TimeSeries_DateLocator.__call__(self)
    999     vmin, vmax = vmax, vmin
   1000 if self.isdynamic:
-> 1001     locs = self._get_default_locs(vmin, vmax)
   1002 else:  # pragma: no cover
   1003     base = self.base

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/plotting/_matplotlib/converter.py:986, in TimeSeries_DateLocator._get_default_locs(self, vmin, vmax)
    984 def _get_default_locs(self, vmin, vmax):
    985     """Returns the default locations of ticks."""
--> 986     locator = self.finder(vmin, vmax, self.freq)
    988     if self.isminor:
    989         return np.compress(locator["min"], locator["val"])

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/plotting/_matplotlib/converter.py:732, in _daily_finder(vmin, vmax, freq)
    730 elif span <= 1.15 * periodsperyear:
    731     year_start = _period_break(dates_, "year")
--> 732     month_start = _period_break(dates_, "month")
    733     week_start = _period_break(dates_, "week")
    734     info_maj[month_start] = True

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/plotting/_matplotlib/converter.py:525, in _period_break(dates, period)
    514 def _period_break(dates: PeriodIndex, period: str) -> npt.NDArray[np.intp]:
    515     """
    516     Returns the indices where the given period changes.
    517 
   (...)
    523         Name of the period to monitor.
    524     """
--> 525     mask = _period_break_mask(dates, period)
    526     return np.nonzero(mask)[0]

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/plotting/_matplotlib/converter.py:531, in _period_break_mask(dates, period)
    529 def _period_break_mask(dates: PeriodIndex, period: str) -> npt.NDArray[np.bool_]:
    530     current = getattr(dates, period)
--> 531     previous = getattr(dates - 1 * dates.freq, period)
    532     return current != previous

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/indexes/extension.py:68, in _inherit_from_data.<locals>.fget(self)
     67 def fget(self):
---> 68     result = getattr(self._data, name)
     69     if wrap:
     70         if isinstance(result, type(self._data)):

File /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/arrays/period.py:111, in _field_accessor.<locals>.f(self)
    109 def f(self):
    110     base = self.dtype._dtype_code
--> 111     result = get_period_field_arr(name, self.asi8, base)
    112     return result

KeyboardInterrupt: 
columnas = tmx.columns
fig, ax = plt.subplots(7,figsize=(10,6))

for i,columna in enumerate(columnas):
    ax[i].plot(tmx[columna],label=columna)
    ax[i].legend()
plt.show() 

tmx.mean()
Ib      236.742726
Ig      257.414344
To       22.838098
RH       45.152827
WS        1.905200
WD      210.734453
P     87591.151598
dtype: float64
tmx[['Ig','Ib']].mean()
Ig    257.414344
Ib    236.742726
dtype: float64
tmx.resample('ME').mean()
Ib Ig To RH WS WD P
time
2018-01-31 229.713302 207.559305 19.598040 33.566660 1.976923 227.108655 87759.582867
2018-02-28 265.335796 245.766019 23.018237 30.830774 1.925072 207.425322 87676.561910
2018-03-31 291.085348 283.570775 25.491299 25.675431 1.969527 197.244648 87425.268981
2018-04-30 236.256088 289.300213 25.529778 31.692567 2.117917 188.417818 87496.062234
2018-05-31 200.070784 270.222629 26.747883 37.973319 2.049228 192.587381 87359.343943
2018-06-30 180.421613 269.410717 22.982317 57.115613 1.787019 194.736577 87552.942243
2018-07-31 276.271034 310.902144 23.816322 44.213600 1.945517 210.220904 87740.813692
2018-08-31 230.954512 286.472724 22.119326 60.574767 1.853452 200.686337 87707.593947
2018-09-30 230.991977 275.230343 22.291153 61.207928 1.853109 213.798792 87532.336271
2018-10-31 221.022707 243.401029 22.149418 60.249711 1.762850 219.965041 87523.024857
2018-11-30 221.582542 200.433847 20.636956 55.397896 1.731674 238.815468 87649.620528
2018-12-31 256.043035 206.063444 19.696709 42.749745 1.887806 237.254994 87674.622088
tmx.loc[tmx.Ig >= 300]
Ib Ig To RH WS WD P
time
2018-01-01 09:10:00 533.6 315.4 19.58 34.76 0.842 167.1 88161.86
2018-01-01 09:20:00 556.8 349.5 20.31 33.36 1.198 172.0 88156.05
2018-01-01 09:30:00 574.3 379.8 20.21 32.46 0.947 203.4 88165.92
2018-01-01 09:40:00 589.4 410.6 20.38 31.60 1.457 183.7 88160.85
2018-01-01 09:50:00 611.2 445.6 20.46 31.72 2.448 209.2 88169.38
... ... ... ... ... ... ... ...
2018-12-31 16:00:00 302.3 366.4 25.28 27.20 3.585 211.1 87320.61
2018-12-31 16:10:00 686.4 549.3 25.47 25.88 2.023 181.5 87339.67
2018-12-31 16:20:00 447.8 389.1 25.66 25.96 1.593 172.4 87322.26
2018-12-31 16:30:00 722.8 417.4 25.72 25.40 1.983 184.5 87326.93
2018-12-31 16:40:00 667.0 330.7 26.31 24.89 1.323 260.6 87328.50

17808 rows × 7 columns

# Diccionario de diccionarios
participantes = {
    'Diego': {'Edad': 30, 'Ciudad': 'Campeche'},
    'Nazli': {'Edad': 22, 'Ciudad': 'CdMx'},
    'Kevin': {'Edad': 40, 'Ciudad': 'Cuernavaca'},
    'Gaby': {'Edad': 37, 'Ciudad': 'Cuautla'},

}

# Convertir a DataFrame
participantes_df = pd.DataFrame.from_dict(participantes, orient='index')
participantes_df
Edad Ciudad
Diego 30 Campeche
Nazli 22 CdMx
Kevin 40 Cuernavaca
Gaby 37 Cuautla
# Diccionario con tuplas como valores
nombres = {
    'Equipa': [
        ('Diego', 30),
        ('Nazli', 22),
        ('Kevin', 40)
    ]
}

# Convertir a DataFrame
nombres_df = pd.DataFrame(nombres['Equipa'], columns=['Nombre', 'Edad'])
nombres_df
Nombre Edad
0 Diego 30
1 Nazli 22
2 Kevin 40

# Diccionario sin fechas
nombres = {
    'Nombre': ['Diego', 'Nazli', 'Kevin'],
    'Edad': [30, 22, 40]
}

# Convertir a DataFrame
nombres_df = pd.DataFrame(nombres_df)
nombres_df
Nombre Edad
0 Diego 30
1 Nazli 22
2 Kevin 40

Algunas cosas que se pueden hacer con pandas

  1. Lectura y escritura de datos
    • csv
    • excel
    • clipboard
    • parquet
    • sql
    • hdf5
    • html
  2. Limpieza de datos
  3. Transformación de datos
  4. Visualización rápida de datos
  5. Estadística de datos
  6. Inspección rápida de estructura y tipos de datos
  7. Selección y filtrado de datos
  8. Combinación de datos
  9. Agrupación de datos