28  Agrupamiento de datos con groupby

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
f = '../../data/owid-co2-data.csv'
co2 = pd.read_csv(f)
co2
country year iso_code population gdp cement_co2 cement_co2_per_capita co2 co2_growth_abs co2_growth_prct ... share_global_other_co2 share_of_temperature_change_from_ghg temperature_change_from_ch4 temperature_change_from_co2 temperature_change_from_ghg temperature_change_from_n2o total_ghg total_ghg_excluding_lucf trade_co2 trade_co2_share
0 Afghanistan 1850 AFG 3752993.0 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 Afghanistan 1851 AFG 3767956.0 NaN NaN NaN NaN NaN NaN ... NaN 0.157 0.000 0.000 0.000 0.0 NaN NaN NaN NaN
2 Afghanistan 1852 AFG 3783940.0 NaN NaN NaN NaN NaN NaN ... NaN 0.156 0.000 0.000 0.000 0.0 NaN NaN NaN NaN
3 Afghanistan 1853 AFG 3800954.0 NaN NaN NaN NaN NaN NaN ... NaN 0.156 0.000 0.000 0.000 0.0 NaN NaN NaN NaN
4 Afghanistan 1854 AFG 3818038.0 NaN NaN NaN NaN NaN NaN ... NaN 0.155 0.000 0.000 0.000 0.0 NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
47410 Zimbabwe 2018 ZWE 15052191.0 2.678627e+10 0.558 0.037 10.715 1.419 15.265 ... NaN 0.114 0.001 0.001 0.002 0.0 116.76 29.37 -0.088 -0.825
47411 Zimbabwe 2019 ZWE 15354606.0 2.514642e+10 0.473 0.031 9.775 -0.939 -8.765 ... NaN 0.113 0.001 0.001 0.002 0.0 116.03 28.70 0.143 1.463
47412 Zimbabwe 2020 ZWE 15669663.0 2.317871e+10 0.496 0.032 7.850 -1.926 -19.700 ... NaN 0.112 0.001 0.001 0.002 0.0 113.20 25.99 0.818 10.421
47413 Zimbabwe 2021 ZWE 15993525.0 2.514009e+10 0.531 0.033 8.396 0.547 6.962 ... NaN 0.110 0.001 0.001 0.002 0.0 NaN NaN 1.088 12.956
47414 Zimbabwe 2022 ZWE 16320539.0 2.590159e+10 0.531 0.033 8.856 0.460 5.477 ... NaN 0.110 0.001 0.001 0.002 0.0 NaN NaN NaN NaN

47415 rows × 79 columns

px.line(co2, x = 'year',y='co2',color='country')
co2.groupby('country')['co2'].sum().reset_index().sort_values(by='co2',ascending=False).head(20)
country co2
257 World 1772867.527
98 High-income countries 1001743.141
176 OECD (GCP) 967949.828
169 Non-OECD (GCP) 756445.479
13 Asia 578424.628
249 Upper-middle-income countries 553307.841
74 Europe 542459.558
75 Europe (GCP) 538058.253
14 Asia (GCP) 503963.270
170 North America 490241.437
171 North America (GCP) 482514.750
248 United States 426914.556
79 European Union (28) 374804.679
78 European Union (27) 295969.981
47 China 260619.243
15 Asia (excl. China and India) 258064.693
76 Europe (excl. EU-27) 246489.581
77 Europe (excl. EU-28) 167654.884
136 Lower-middle-income countries 163674.534
195 Russia 119290.814
co2.groupby('iso_code')['co2'].sum().reset_index().sort_values(by='co2',ascending=False).head(20)
iso_code co2
204 USA 426914.556
37 CHN 260619.243
163 RUS 119290.814
52 DEU 93985.871
70 GBR 78834.706
97 JPN 67734.911
88 IND 59740.694
66 FRA 39397.693
34 CAN 34613.228
202 UKR 30961.508
155 POL 28503.614
94 ITA 25416.444
215 ZAF 21859.984
123 MEX 21119.477
90 IRN 19852.750
104 KOR 19528.810
11 AUS 19285.056
165 SAU 17595.887
28 BRA 17176.783
87 IDN 15712.110
co2.groupby('country')['co2'].max().reset_index().sort_values(by='co2',ascending=False).head(20)
country co2
257 World 37149.785
169 Non-OECD (GCP) 24332.602
13 Asia 21772.348
14 Asia (GCP) 19102.592
249 Upper-middle-income countries 17616.729
98 High-income countries 14603.550
176 OECD (GCP) 14040.573
47 China 11396.777
74 Europe 8038.431
75 Europe (GCP) 8038.430
15 Asia (excl. China and India) 7545.928
170 North America 7366.565
171 North America (GCP) 7198.346
248 United States 6132.183
136 Lower-middle-income countries 5672.114
79 European Union (28) 4757.648
76 Europe (excl. EU-27) 4168.003
78 European Union (27) 4113.135
77 Europe (excl. EU-28) 3566.058
103 India 2829.644
paises = co2[['co2','country','year']].groupby(['country','year']).sum()
paises.loc['United States']
co2
year
1800 0.253
1801 0.267
1802 0.289
1803 0.297
1804 0.333
... ...
2018 5377.797
2019 5262.145
2020 4714.628
2021 5032.213
2022 5057.304

223 rows × 1 columns

fig = go.Figure()

fig.add_trace(
    go.Scatter(x=paises.loc['United States'].index,y=paises.loc['United States']['co2'],name='USA')
)

fig.add_trace(
    go.Scatter(x=paises.loc['Mexico'].index,y=paises.loc['Mexico']['co2'],name='Mex')
)
fig.add_trace(
    go.Scatter(x=paises.loc['World'].index,y=paises.loc['World']['co2'],name='WLD')
)

fig.show()