from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
%load_ext watermark
%watermark --iversionspyfixest : 0.40.1
causaldata: 0.1.5
This notebook replicates code examples from Nick Huntington-Klein’s book on causal inference, The Effect.
pyfixest : 0.40.1
causaldata: 0.1.5
# Read in data
dt = Mroz.load_pandas().data
# Keep just working women
dt = dt.query("lfp")
# Create unlogged earnings
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
# 5. Run multiple linear regression models by succesively adding controls
fit = pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
pf.etable(fit)/tmp/ipykernel_7002/786816010.py:6: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
| lwg | |||
|---|---|---|---|
| (1) | (2) | (3) | |
| coef | |||
| inc | 0.010** (0.003) |
0.005 (0.003) |
0.005 (0.003) |
| wc | 0.342*** (0.075) |
0.349*** (0.075) |
|
| k5 | -0.072 (0.087) |
||
| Intercept | 1.007*** (0.071) |
0.972*** (0.070) |
0.982*** (0.071) |
| stats | |||
| Observations | 428 | 428 | 428 |
| S.E. type | iid | iid | iid |
| R2 | 0.020 | 0.066 | 0.068 |
| Adj. R2 | 0.018 | 0.062 | 0.061 |
| Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error) | |||
| inspection_score | |
|---|---|
| (1) | |
| coef | |
| NumberofLocations | -0.019*** (0.000) |
| Intercept | 94.866*** (0.046) |
| stats | |
| Observations | 27178 |
| S.E. type | iid |
| R2 | 0.065 |
| Adj. R2 | 0.065 |
| Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error) | |
| inspection_score | ||
|---|---|---|
| (1) | (2) | |
| coef | ||
| NumberofLocations | -0.075*** (0.019) |
-0.019*** (0.000) |
| I(NumberofLocations ^ 2) | 0.056** (0.019) |
|
| Year | -0.065*** (0.006) |
-0.065*** (0.006) |
| Weekend | 1.759*** (0.488) |
|
| NumberofLocations:Weekend | -0.010 (0.008) |
|
| Intercept | 225.504*** (12.409) |
225.126*** (12.415) |
| stats | ||
| Observations | 27178 | 27178 |
| S.E. type | iid | iid |
| R2 | 0.069 | 0.069 |
| Adj. R2 | 0.069 | 0.069 |
| Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error) | ||
###
Estimation: OLS
Dep. var.: inspection_score, Fixed effects: 0
Inference: HC3
Observations: 27178
| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |
|:--------------|-----------:|-------------:|----------:|-----------:|--------:|--------:|
| Intercept | 185.380 | 12.150 | 15.257 | 0.000 | 161.564 | 209.196 |
| Year | -0.046 | 0.006 | -7.551 | 0.000 | -0.057 | -0.034 |
| Weekend | 2.057 | 0.353 | 5.829 | 0.000 | 1.365 | 2.749 |
---
RMSE: 6.248 R2: 0.003
| Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | |
|---|---|---|---|---|---|---|
| Coefficient | ||||||
| Intercept | 185.380033 | 3.264345 | 56.789344 | 0.011209 | 143.902592 | 226.857474 |
| Year | -0.045640 | 0.001624 | -28.107556 | 0.022640 | -0.066272 | -0.025008 |
| Weekend | 2.057166 | 0.001401 | 1468.256801 | 0.000434 | 2.039364 | 2.074969 |
tba
| Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | |
|---|---|---|---|---|---|---|
| Coefficient | ||||||
| Intercept | -27.773459 | 2.500533 | -11.107015 | 0.000000e+00 | -32.678217 | -22.868701 |
| C(country)[T.Albania] | 17.782625 | 2.195160 | 8.100835 | 1.110223e-15 | 13.476853 | 22.088397 |
| C(country)[T.Algeria] | 5.241055 | 2.214496 | 2.366704 | 1.806875e-02 | 0.897356 | 9.584755 |
| C(country)[T.Angola] | -13.907122 | 2.201727 | -6.316460 | 3.481857e-10 | -18.225777 | -9.588468 |
| C(country)[T.Argentina] | 8.132158 | 2.272781 | 3.578065 | 3.567229e-04 | 3.674133 | 12.590183 |
###
Estimation: OLS
Dep. var.: lifeExp, Fixed effects: country+year
Inference: iid
Observations: 1704
| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |
|:------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|
| np.log(gdpPercap) | 1.450 | 0.268 | 5.419 | 0.000 | 0.925 | 1.975 |
---
RMSE: 3.267 R2: 0.936 R2 Within: 0.019
###
Estimation: OLS
Dep. var.: Rate, Fixed effects: State+Quarter
Inference: iid
Observations: 162
| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |
|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|
| Treated | -0.022 | 0.020 | -1.096 | 0.275 | -0.063 | 0.018 |
---
RMSE: 0.022 R2: 0.979 R2 Within: 0.009
od = organ_donations.load_pandas().data
# Create Treatment Variable
od["California"] = od["State"] == "California"
# od["Quarter_Num"] = pd.Categorical(od.Quarter_Num)
od["California"] = od.California.astype(float)
did2 = pf.feols(
fml="Rate ~ i(Quarter_Num, California,ref=3) | State + Quarter_Num", data=od
)
did2.tidy()| Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | |
|---|---|---|---|---|---|---|
| Coefficient | ||||||
| C(Quarter_Num, contr.treatment(base=3))[1]:California | -0.002942 | 0.036055 | -0.081606 | 0.935090 | -0.074299 | 0.068415 |
| C(Quarter_Num, contr.treatment(base=3))[2]:California | 0.006296 | 0.036055 | 0.174627 | 0.861655 | -0.065061 | 0.077653 |
| C(Quarter_Num, contr.treatment(base=3))[4]:California | -0.021565 | 0.036055 | -0.598127 | 0.550837 | -0.092922 | 0.049792 |
| C(Quarter_Num, contr.treatment(base=3))[5]:California | -0.020292 | 0.036055 | -0.562817 | 0.574567 | -0.091649 | 0.051065 |
| C(Quarter_Num, contr.treatment(base=3))[6]:California | -0.022165 | 0.036055 | -0.614768 | 0.539825 | -0.093522 | 0.049192 |