import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
f = "../data/tbd_9horas.parquet"
tdb = pd.read_parquet(f)
tdb.info()
<class 'pandas.DataFrame'>
DatetimeIndex: 44983 entries, 2023-06-01 09:00:00 to 2025-06-19 09:59:00
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tdb     44983 non-null  float64
dtypes: float64(1)
memory usage: 702.9 KB
fig, ax = plt.subplots( figsize=(12, 4))

mu = tdb.tdb.mean()
sigma = tdb.tdb.std()

x = np.linspace(tdb.min(), tdb.max(), 200)
pdf = (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)

ax.hist(tdb, bins=40, density=True, alpha=0.6)
ax.plot(x, pdf, 'r-', lw=2, label=f'μ={mu:.2f}, σ={sigma:.2f}')
ax.legend(fontsize=8)

plt.tight_layout()

N = 5
muestra = tdb.tdb.sample(n=N, random_state=42)
fig, ax = plt.subplots()


x = np.linspace(tdb.min(), tdb.max(), 200)
pdf = (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)


ax.hist(muestra,bins=40,density=True)
ax.plot(x,pdf)

plt.show()
print(muestra.mean(),muestra.std())
print(mu,sigma)

24.823999999999998 3.080946283205859
24.64268657048218 2.7513050667323573