from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
%load_ext watermark
%watermark --iversions
causaldata: 0.1.5
pyfixest : 0.28.0
This notebook replicates code examples from Nick Huntington-Klein’s book on causal inference, The Effect.
from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
%load_ext watermark
%watermark --iversions
causaldata: 0.1.5
pyfixest : 0.28.0
# Read in data
dt = Mroz.load_pandas().data
# Keep just working women
dt = dt.query("lfp")
# Create unlogged earnings
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
# 5. Run multiple linear regression models by succesively adding controls
fit = pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
pf.etable(fit)
lwg | |||
---|---|---|---|
(1) | (2) | (3) | |
coef | |||
inc | 0.010** (0.003) |
0.005 (0.003) |
0.005 (0.003) |
wc | 0.342*** (0.075) |
0.349*** (0.075) |
|
k5 | -0.072 (0.087) |
||
Intercept | 1.007*** (0.071) |
0.972*** (0.070) |
0.982*** (0.071) |
stats | |||
Observations | 428 | 428 | 428 |
S.E. type | iid | iid | iid |
R2 | 0.020 | 0.066 | 0.068 |
Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error) |
res = restaurant_inspections.load_pandas().data
res.inspection_score = res.inspection_score.astype(float)
res.NumberofLocations = res.NumberofLocations.astype(float)
res.dtypes
fit = pf.feols(fml="inspection_score ~ NumberofLocations", data=res)
pf.etable([fit])
inspection_score | |
---|---|
(1) | |
coef | |
NumberofLocations | -0.019*** (0.000) |
Intercept | 94.866*** (0.046) |
stats | |
Observations | 27178 |
S.E. type | iid |
R2 | 0.065 |
Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error) |
df = restaurant_inspections.load_pandas().data
fit1 = pf.feols(
fml="inspection_score ~ NumberofLocations + I(NumberofLocations^2) + Year", data=df
)
fit2 = pf.feols(fml="inspection_score ~ NumberofLocations*Weekend + Year", data=df)
pf.etable([fit1, fit2])
inspection_score | ||
---|---|---|
(1) | (2) | |
coef | ||
NumberofLocations | -0.075*** (0.019) |
-0.019*** (0.000) |
I(NumberofLocations ^ 2) | 0.056** (0.019) |
|
Year | -0.065*** (0.006) |
-0.065*** (0.006) |
Weekend | 1.759*** (0.488) |
|
NumberofLocations:Weekend | -0.010 (0.008) |
|
Intercept | 225.504*** (12.409) |
225.126*** (12.415) |
stats | ||
Observations | 27178 | 27178 |
S.E. type | iid | iid |
R2 | 0.069 | 0.069 |
Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error) |
###
Estimation: OLS
Dep. var.: inspection_score, Fixed effects: 0
Inference: HC3
Observations: 27178
| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |
|:--------------|-----------:|-------------:|----------:|-----------:|--------:|--------:|
| Intercept | 185.380 | 12.150 | 15.257 | 0.000 | 161.564 | 209.196 |
| Year | -0.046 | 0.006 | -7.551 | 0.000 | -0.057 | -0.034 |
| Weekend | 2.057 | 0.353 | 5.829 | 0.000 | 1.365 | 2.749 |
---
RMSE: 6.248 R2: 0.003
Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | |
---|---|---|---|---|---|---|
Coefficient | ||||||
Intercept | 185.380033 | 3.264345 | 56.789343 | 0.011209 | 143.902592 | 226.857474 |
Year | -0.045640 | 0.001624 | -28.107556 | 0.022640 | -0.066272 | -0.025008 |
Weekend | 2.057166 | 0.001401 | 1468.256799 | 0.000434 | 2.039364 | 2.074969 |
tba
gm = gapminder.load_pandas().data
gm["logGDPpercap"] = gm["gdpPercap"].apply("log")
fit = pf.feols(fml="lifeExp ~ C(country) + np.log(gdpPercap)", data=gm)
fit.tidy().head()
Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | |
---|---|---|---|---|---|---|
Coefficient | ||||||
Intercept | -27.773459 | 2.500533 | -11.107015 | 0.000000e+00 | -32.678217 | -22.868701 |
C(country)[T.Albania] | 17.782625 | 2.195160 | 8.100835 | 1.110223e-15 | 13.476853 | 22.088397 |
C(country)[T.Algeria] | 5.241055 | 2.214496 | 2.366704 | 1.806875e-02 | 0.897356 | 9.584755 |
C(country)[T.Angola] | -13.907122 | 2.201727 | -6.316460 | 3.481857e-10 | -18.225777 | -9.588468 |
C(country)[T.Argentina] | 8.132158 | 2.272781 | 3.578065 | 3.567229e-04 | 3.674133 | 12.590183 |
# Set our individual and time (index) for our data
fit = pf.feols(fml="lifeExp ~ np.log(gdpPercap) | country + year", data=gm)
fit.summary()
###
Estimation: OLS
Dep. var.: lifeExp, Fixed effects: country+year
Inference: CRV1
Observations: 1704
| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |
|:------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|
| np.log(gdpPercap) | 1.450 | 0.677 | 2.141 | 0.034 | 0.111 | 2.788 |
---
RMSE: 3.267 R2: 0.936 R2 Within: 0.019
od = organ_donations.load_pandas().data
# Create Treatment Variable
od["California"] = od["State"] == "California"
od["After"] = od["Quarter_Num"] > 3
od["Treated"] = 1 * (od["California"] & od["After"])
did = pf.feols(fml="Rate ~ Treated | State + Quarter", data=od)
did.summary()
###
Estimation: OLS
Dep. var.: Rate, Fixed effects: State+Quarter
Inference: CRV1
Observations: 162
| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |
|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|
| Treated | -0.022 | 0.006 | -3.733 | 0.001 | -0.035 | -0.010 |
---
RMSE: 0.022 R2: 0.979 R2 Within: 0.009
od = organ_donations.load_pandas().data
# Create Treatment Variable
od["California"] = od["State"] == "California"
# od["Quarter_Num"] = pd.Categorical(od.Quarter_Num)
od["California"] = od.California.astype(float)
did2 = pf.feols(
fml="Rate ~ i(Quarter_Num, California,ref=3) | State + Quarter_Num", data=od
)
did2.tidy()
Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | |
---|---|---|---|---|---|---|
Coefficient | ||||||
C(Quarter_Num, contr.treatment(base=3))[1]:California | -0.002942 | 0.004986 | -0.590105 | 0.560215 | -0.013191 | 0.007307 |
C(Quarter_Num, contr.treatment(base=3))[2]:California | 0.006296 | 0.002222 | 2.833502 | 0.008782 | 0.001729 | 0.010864 |
C(Quarter_Num, contr.treatment(base=3))[4]:California | -0.021565 | 0.004937 | -4.368464 | 0.000178 | -0.031713 | -0.011418 |
C(Quarter_Num, contr.treatment(base=3))[5]:California | -0.020292 | 0.004387 | -4.625529 | 0.000090 | -0.029310 | -0.011275 |
C(Quarter_Num, contr.treatment(base=3))[6]:California | -0.022165 | 0.009820 | -2.257160 | 0.032627 | -0.042351 | -0.001980 |