* This file performs the PK WESML-LIML and WESML-LIML-FE regressions on a data set provided by Mark Pitt in 2008, which may or may not match the original * (but which matches the R&M reproduction very well). It first runs the estimator unconstrained, producing a negative sign on female credit variables, * then constrains the coefficients to match PK's published ones, which reduces the log likelihood. It also demonstrates in a simplified case that the * likelihood computed by the estimation program, -cmp-, exactly matches that given by formulas in the PK appendix. * odbc load, exec("select * from [Roodman & Morduch HH] where wave in (1,2,3)") dsn(PK) clear use "Roodman & Morduch HH.dta", clear keep if pksample & wave<4 * copy in missing last obs foreach var in fbraclv mbraclv fbrdblv mbrdblv fgramlv mgramlv { replace l`var'pk = ln(`var') if nh==2931741 & wave==1 replace l`var'pk = 0 if l`var'pk==. & nh==2931741 & wave==1 } replace llandbefpk = ln(landbef) if nh==2931741 & wave==1 replace lpcnsexppk = ln(pcnsexp) if nh==2931741 & wave==1 replace choicepk = 0 if nh==2931741 & wave==1 replace nontarpk = 0 if nh==2931741 & wave==1 foreach var in scohead afedhigh amedhigh afadultd amadultd sexhead agehead llandbef edhead weight { replace `var'pk = `var' if nh==2931741 & wave==1 } gen byte crcensored = progid<4 & proglv==0 gen fproglvpk = fbraclv + fbrdblv + fgramlv gen mproglvpk = mbraclv + mbrdblv + mgramlv gen lproglvpk = ln(fproglv + mproglv) gen lfproglvpk = ln(fproglv) gen lmproglvpk = ln(mproglv) recode *lndd* (. = 0) recode lproglvpk lfproglvpk lmproglvpk (. = 0) gen byte progd = lproglvpk < . gen tarpk = !nontarpk bysort village:egen villf=max(pksamplef) bysort village:egen villm=max(pksamplem) recode pzwflour pzmilk (. = 0) // needed to match sample sizes. These prices are actually stored as 0 in original data. scalar C_0 = ln(1) //credit censoring level recode l*lvpk (0 = `=C_0') sum [aw=weightpk] if wave==1 xi i.village i.wave global villagechars primcoed hpruralh hpfampln hpmidwif pzrice pzwflour pzmoil pzhegg pzmilk pzpotato wagef nowagef wagem bamtr global covsm scoheadpk afedhighpk amedhighpk afadultdpk sexheadpk ageheadpk llandbefpk edheadpk hdbrolnddpk hdsislnddpk hdparlnddpk spbrolnddpk spsislnddpk spparlnddpk _Iwave* global covsf scoheadpk afedhighpk amedhighpk amadultdpk sexheadpk ageheadpk llandbefpk edheadpk hdbrolnddpk hdsislnddpk hdparlnddpk spbrolnddpk spsislnddpk spparlnddpk _Iwave* global covsy scoheadpk afedhighpk amedhighpk afadultdpk amadultdpk sexheadpk ageheadpk llandbefpk edheadpk hdbrolnddpk hdsislnddpk hdparlnddpk spbrolnddpk spsislnddpk spparlnddpk _Iwave* crcensored cmp setup * cmp sample indicators for credit equations gen byte indf = cond(pksamplef, cond(lfproglvpk<=C_0+.0001, $cmp_left, $cmp_cont), $cmp_out) gen byte indm = cond(pksamplem, cond(lmproglvpk<=C_0+.0001, $cmp_left, $cmp_cont), $cmp_out) foreach prog in brac brdb gram { gen f`prog'lvpk = exp(lf`prog'lvpk) replace f`prog'lvpk = 0 if f`prog'lvpk == 1 gen m`prog'lvpk = exp(lm`prog'lvpk) replace m`prog'lvpk = 0 if m`prog'lvpk == 1 gen l`prog'lvpk = ln(f`prog'lvpk + m`prog'lvpk) } est clear reg lpcnsexppk lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy $villagechars if !nontarpk est store Naive_OLS reg lpcnsexppk lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy $villagechars if !nontar [aw = weightpk] est store Naive_weighted_OLS cmp (lpcnsexppk = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy $villagechars) (lfproglvpk = $covsf $villagechars) /// (lmproglvpk = $covsm $villagechars) [pw=weightpk], ind($cmp_cont indf indm) cluster(nh) qui est store WESML_LIML cmp (lpcnsexppk = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy _Ivill*) (lfproglvpk = $covsf _Ivill*) /// (lmproglvpk = $covsm _Ivill*) [pw=weightpk], ind($cmp_cont indf indm) cluster(nh) qui est store WESML_LIML_FE est table Naive_OLS Naive_weighted_OLS, keep(lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk) t modelwidth(20) est table WESML_LIML WESML_LIML_FE, keep(lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk) t modelwidth(20) foreach var of varlist $covsm _Ivill* { gen byte zm`var' = pksamplem * `var' } foreach var of varlist $covsf _Ivill* { gen byte zf`var' = pksamplef * `var' } ivreg lpcnsexppk $covsy _Ivill* (lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk = z* pksample?) [pw=weightpk], cluster(nh) est store TSLS * WESML-LIML: constrain coefficients on credit vars to reported values constraint 1 [lpcnsexppk]lfbraclvpk = .0340 constraint 2 [lpcnsexppk]lmbraclvpk = -.0161 constraint 3 [lpcnsexppk]lfbrdblvpk = .0258 constraint 4 [lpcnsexppk]lmbrdblvpk = -.0155 constraint 5 [lpcnsexppk]lfgramlvpk = .0371 constraint 6 [lpcnsexppk]lmgramlvpk = -.0225 cmp (lpcnsexppk = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy $villagechars) (lfproglvpk = $covsf $villagechars) /// (lmproglvpk = $covsm $villagechars) [pw=weightpk], ind($cmp_cont indf indm) cluster(nh) constraint(1/6) qui est store WESML_LIML_PK_COEF * WESML-LIML-FE: constrain coefficients on credit vars to reported values, and constrain rho's too constraint 1 [lpcnsexppk]lfbraclvpk = .0394 constraint 2 [lpcnsexppk]lmbraclvpk = .0192 constraint 3 [lpcnsexppk]lfbrdblvpk = .0402 constraint 4 [lpcnsexppk]lmbrdblvpk = .0233 constraint 5 [lpcnsexppk]lfgramlvpk = .0432 constraint 6 [lpcnsexppk]lmgramlvpk = .0179 constraint 7 [atanhrho_12]_cons = `=atanh(-.4809)' constraint 8 [atanhrho_13]_cons = `=atanh(-.2060)' cmp (lpcnsexppk = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy _Ivill*) (lfproglvpk = $covsf _Ivill*) /// (lmproglvpk = $covsm _Ivill*) [pw=weightpk], ind($cmp_cont indf indm) cluster(nh) constraint(1/8) qui est store WESML_LIML_FE_PK_COEF * constrain all final-stage coefficients in the same for the regression restricted to target HH using Table B2 of the PK 1996 working paper * (only one with all the second-stage coef reported) cmp (lpcnsexppk = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy _Ivill*) (lfproglvpk = $covsf _Ivill*) /// (lmproglvpk = $covsm _Ivill*) [pw=weightpk] if !nontar, ind($cmp_cont indf indm) cluster(nh) tech(nr dfp) qui est store WESML_LIML_FE_TARGET constraint 1 [lpcnsexppk]lfbraclvpk = .038 constraint 2 [lpcnsexppk]lmbraclvpk = .018 constraint 3 [lpcnsexppk]lfbrdblvpk = .041 constraint 4 [lpcnsexppk]lmbrdblvpk = .024 constraint 5 [lpcnsexppk]lfgramlvpk = .044 constraint 6 [lpcnsexppk]lmgramlvpk = .018 constraint 7 [lpcnsexppk]scohead = 0.141 constraint 8 [lpcnsexppk]afedhigh = 0.029 constraint 9 [lpcnsexppk]amedhigh = 0.019 constraint 10 [lpcnsexppk]afadultd = 0.159 constraint 11 [lpcnsexppk]amadultd = -0.014 constraint 12 [lpcnsexppk]sexhead = 0.11 constraint 13 [lpcnsexppk]agehead = -0.003 constraint 14 [lpcnsexppk]llandbef = 0.015 constraint 15 [lpcnsexppk]edhead = -0.007 constraint 16 [lpcnsexppk]hdbrolnd = -0.016 constraint 17 [lpcnsexppk]hdsislnd = 0.802 constraint 18 [lpcnsexppk]hdparlnd = 0.02 constraint 19 [lpcnsexppk]spbrolnd = 0 constraint 20 [lpcnsexppk]spsislnd = 0.002 constraint 21 [lpcnsexppk]spparlnd = 0.021 constraint 22 [lpcnsexppk]_Iwave_2 = -0.021 constraint 23 [lpcnsexppk]_Iwave_3 = -0.222 constraint 24 [lpcnsexppk]crcensored = 0.059 constraint 25 [lnsig_1]_cons = `=ln(.383)' constraint 26 [atanhrho_12]_cons = `=atanh(-.464)' constraint 27 [atanhrho_13]_cons = `=atanh(-.191)' cmp (lpcnsexppk = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy _Ivill*) (lfproglvpk = $covsf _Ivill*) /// (lmproglvpk = $covsm _Ivill*) [pw=weightpk] if !nontar, ind($cmp_cont indf indm) cluster(nh) constraint(1/27) qui est store WESML_LIML_FE_TAR_PK_COEF est table WESML_LIML WESML_LIML_FE WESML_LIML_PK_COEF WESML_LIML_FE_PK_COEF WESML_LIML_FE_TARGET WESML_LIML_FE_TAR_PK_COEF, stats(ll) keep(lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk) t modelwidth(20) * Now turn consumption into a binary variable and manually check log likelihood using PK 1998 appendix formulas * drop regressors for the sake of speed gen byte lpcnsexpd = lpcnsexppk>4 cmp (lpcnsexpd = lfbraclvpk lmbraclvpk lfbrdblvpk lmbrdblvpk lfgramlvpk lmgramlvpk $covsy) (lfproglvpk = $covsf) /// (lmproglvpk = $covsm) [pw=weightpk] , ind($cmp_probit indf indm) inter qui scalar rho_fy = tanh(_b[atanhrho_12:_cons]) scalar rho_my = tanh(_b[atanhrho_13:_cons]) scalar rho_fm = tanh(_b[atanhrho_23:_cons]) scalar rhotwiddle_my = (rho_my-rho_fm*rho_fy)/sqrt(1-rho_fm^2)/sqrt(1-rho_fy^2) scalar rhotwiddle_fy = (rho_fy-rho_fm*rho_my)/sqrt(1-rho_fm^2)/sqrt(1-rho_my^2) mat R = 1, rho_fm, rho_fy \ rho_fm, 1, rho_my \ rho_fy, rho_my, 1 local lnsig_1 0 // for probit eq, sig=0 mat sigma = exp(_b[lnsig_2:_cons]), exp(_b[lnsig_3:_cons]), exp(`lnsig_1') mata: st_matrix("Sigma", st_matrix("sigma") :* st_matrix("R") :* st_matrix("sigma")') scalar _f = 1 scalar _m = 2 scalar _y = 3 gen byte d_ij = lpcnsexpd*2-1 predict double XB_y, eq(lpcnsexpd) replace XB_y = XB_y * d_ij predict double e_f, eq(lfproglvpk) replace e_f = (lfproglv - e_f)/sigma[1,_f] predict double e_m, eq(lmproglvpk) replace e_m = (lmproglv - e_m)/sigma[1,_m] gen double L1 = lnnormal(XB_y) if !pksamplem & !pksamplef gen double L2 = ln(binormal(e_f, XB_y, -rho_fy*d_ij)) if !pksamplem & pksamplef & !lfproglv gen double L3 = lnnormal((XB_y + Sigma[_f,_y] /sigma[1,_y]/sigma[1,_f]^2 *d_ij*sigma[1,_f]*e_f)/sqrt(1-rho_fy^2))+lnnormalden(e_f)-ln(sigma[1,_f]) if !pksamplem & pksamplef & lfproglv gen double L4 = ln(binormal(e_m, XB_y, -rho_my*d_ij)) if !pksamplef & pksamplem & !lmproglv gen double L5 = lnnormal((XB_y + Sigma[_m,_y]/sigma[1,_y]/sigma[1,_m]^2*d_ij*sigma[1,_m]*e_m)/sqrt(1-rho_my^2))+lnnormalden(e_m)-ln(sigma[1,_m]) if !pksamplef & pksamplem & lmproglv * special code for trivariate cumulative normal gen byte sample = pksamplef & pksamplem & !lfproglv & !lmproglv gen double L6 = . mata X = d_ij = L6 = . mata R = st_matrix("R") mata R2 = R; R2[1,3]=R2[3,1]=-R[1,3]; R2[3,2]=R2[2,3]=-R[3,2] mata st_view(X, ., ("e_f", "e_m", "XB_y"), "sample") mata st_view(d_ij, ., "d_ij", "sample") mata st_view(L6, ., "L6", "sample") mata p = ghk2setup(1, 1000, 3, "halton") mata for (i=rows(X); i; i--) L6[i] = ln(ghk2(p, X[i,], d_ij[i]==1? R2 : R, 0, .)) gen double L7 = ln(binormal((e_m-rho_fm*e_f)/sqrt(1-rho_fm^2),(XB_y+rho_fy*e_f*d_ij)/sqrt(1-rho_fy^2), -rhotwiddle_my*d_ij)) /// + lnnormalden(e_f) - ln(sigma[1,_f]) if pksamplem & lfproglv & pksamplef & !lmproglv gen double L8 = ln(binormal((e_f-rho_fm*e_m)/sqrt(1-rho_fm^2),(XB_y+rho_my*e_m*d_ij)/sqrt(1-rho_my^2), -rhotwiddle_fy*d_ij)) /// + lnnormalden(e_m) - ln(sigma[1,_m]) if pksamplef & lmproglv & pksamplem & !lfproglv // "L9" is missing from the appendix--when both genders borrow. So just leave out of this comparison. egen double ll = rowtotal(L1 L2 L3 L4 L5 L6 L7 L8) * manually computed and cmp observation-level log likelihoods match: scatter ll _cmp_lnfi if e(sample) & (!lfproglvpk | !lmproglvpk)