::libraries(
easypackages# Data i/o
"here", # relative file path
"rio", # file import-export
# Data manipulation
"janitor", # data cleaning fns
"haven", # stata, sas, spss data io
"labelled", # var labelling
"readxl", # excel sheets
# "scales", # to change formats and units
"skimr", # quick data summary
"broom", # view model results
# Analysis output
"gt",
# "modelsummary", # output summary tables
"gtsummary", # output summary tables
"flextable", # creating tables from objects
"officer", # editing in office docs
# R graph related packages
"ggstats",
"RColorBrewer",
# "scales",
"patchwork",
# Misc packages
"tidyverse", # Data manipulation iron man
"tictoc" # Code timing
)
Pooled DHS data dictionary
Getting started
First we load the required packages.
Next we turn off scientific notations.
options(scipen = 999)
Next we set the default gtsummary print engine for tables.
theme_gtsummary_printer(print_engine = "flextable")
Now we set the flextable output defaults.
set_flextable_defaults(
font.size = 11,
text.align = "left",
big.mark = "",
background.color = "white",
table.layout = "autofit",
theme_fun = theme_vanilla
)
Document introduction
We document the variable labels and other contents of all variables across all the pooled Demographic and Health Survey (DHS) birth history datasets of South Asian countries here. This will be primary document for viewing what variables will be used for analysis in PhD research. This document will guide us on the variable availability in the pooled datasets of the various South Asian countries.
This document contains variable details for the following Afghanistan DHS surveys:
# Creating the table of pooled surveys to be used for analysis
<- tibble(
sasiadhs_tbl ccode = c(1:2),
country = c("Bangladesh", "Nepal"),
n_sample = c(nrow(bdbr_pl), nrow(npbr_pl)),
n_variables = c(ncol(bdbr_pl), ncol(npbr_pl))
)
# Convert and view as flextable
|>
sasiadhs_tbl mutate(n_sample = prettyNum(n_sample, big.mark = ",")) |>
# convert to flextable object
qflextable() |>
align(align = "left", part = "all")
ccode | country | n_sample | n_variables |
---|---|---|---|
1 | Bangladesh | 349,962 | 120 |
2 | Nepal | 161,330 | 124 |
Here we will create basic data dictionaries of each datasets using the labelled package. In our earlier efforts we realized that the method for generation data dictionaries for each dataset are similar. Hence, we will create a function using the look_for() command to generate the data dictionaries and view them.
Bangladesh DHS pooled data dictionary
Here we check the variable details of the Bangladesh birth history recode (BR) pooled dataset.
svy_name | variable | label | col_type | missing |
---|---|---|---|---|
bdbr | ctr_name | Country name | chr | 0 |
bdbr | ctr_code | Country code | dbl | 0 |
bdbr | year | Survey year | dbl | 0 |
bdbr | uniqueid | Unique birth ID | chr | 0 |
bdbr | commid | Community ID | chr | 0 |
bdbr | hhid | Household ID | chr | 0 |
bdbr | motherid | Mother ID | chr | 0 |
bdbr | caseid | case identification | chr | 0 |
bdbr | bidx | birth column number | dbl | 0 |
bdbr | v000 | country code and phase | chr | 0 |
bdbr | v001 | cluster number | dbl | 0 |
bdbr | v002 | household number | dbl | 0 |
bdbr | v003 | respondent's line number | dbl | 0 |
bdbr | v004 | ultimate area unit | dbl | 0 |
bdbr | v005 | sample weight | dbl | 0 |
bdbr | svy_unf | Un-normalization factor | dbl | 0 |
bdbr | svy_wwgt6 | Women's weight un-normalized | dbl | 0 |
bdbr | svy_psu | primary sampling unit | chr | 0 |
bdbr | svy_strata | sample strata | chr | 0 |
bdbr | v006 | month of interview | dbl | 0 |
bdbr | v007 | year of interview | dbl+lbl | 0 |
bdbr | v008 | date of interview (cmc) | dbl | 0 |
bdbr | v009 | respondent's month of birth | dbl | 0 |
bdbr | v010 | respondent's year of birth | dbl | 0 |
bdbr | v011 | date of birth (cmc) | dbl | 0 |
bdbr | v012 | current age - respondent | dbl | 0 |
bdbr | v021 | primary sampling unit | dbl | 0 |
bdbr | v022 | sample strata | dbl+lbl | 0 |
bdbr | v025 | type of place of residence | dbl+lbl | 0 |
bdbr | v106 | highest educational level | dbl+lbl | 0 |
bdbr | v107 | highest year of education | dbl+lbl | 124078 |
bdbr | v151 | sex of household head | dbl+lbl | 0 |
bdbr | v152 | age of household head | dbl+lbl | 13 |
bdbr | v201 | total children ever born | dbl | 0 |
bdbr | v209 | births in past year | dbl+lbl | 0 |
bdbr | bord | birth order number | dbl | 0 |
bdbr | b0 | child is twin | dbl+lbl | 0 |
bdbr | b1 | month of birth | dbl | 0 |
bdbr | b2 | year of birth | dbl+lbl | 0 |
bdbr | b3 | date of birth (cmc) | dbl | 0 |
bdbr | b4 | sex of child | dbl+lbl | 0 |
bdbr | b5 | child is alive | dbl+lbl | 0 |
bdbr | b6 | age at death | dbl+lbl | 312326 |
bdbr | b7 | age at death (months-imputed) | dbl | 312306 |
bdbr | b8 | current age of child | dbl | 37656 |
bdbr | b9 | child lives with whom | dbl+lbl | 37656 |
bdbr | b10 | completeness of information | dbl+lbl | 0 |
bdbr | b11 | preceding birth interval | dbl | 119918 |
bdbr | b12 | succeeding birth interval | dbl | 121849 |
bdbr | b13 | flag for age at death | dbl+lbl | 312306 |
bdbr | str_v001 | chr | 0 | |
bdbr | str_v002 | chr | 0 | |
bdbr | str_v003 | chr | 0 | |
bdbr | str_bord | chr | 0 | |
bdbr | str_v021 | chr | 0 | |
bdbr | str_v022 | chr | 0 | |
bdbr | cons | constant == 1 | dbl | 0 |
bdbr | agecon | Age at conception of child (cmc) | dbl | 0 |
bdbr | agemort | Age at death of child (cmc) | dbl | 312306 |
bdbr | lag_b5 | Prev child survival status | dbl+lbl | 119918 |
bdbr | lag_agemort | Prev child age at mortality | dbl | 315583 |
bdbr | sibsurv_all | Death scarring | fct | 119918 |
bdbr | sibsurv_con | Death scarring | fct | 119918 |
bdbr | sibsurv_3c | Death scarring | fct | 119918 |
bdbr | sibsurv_all_nmv | Death scarring | fct | 0 |
bdbr | sibsurv_con_nmv | Death scarring | fct | 0 |
bdbr | sibsurv_3c_nmv | Death scarring | fct | 0 |
bdbr | binterval_3c | Preceding birth interval (in months) | fct | 119918 |
bdbr | binterval_3c_opp | Preceding birth interval (in months) | fct | 119918 |
bdbr | binterval_6c | Preceding birth interval (in months) | fct | 119918 |
bdbr | binterval_6c_opp | Preceding birth interval (in months) | fct | 119918 |
bdbr | binterval_3c_nmv | Preceding birth interval (in months) | fct | 0 |
bdbr | binterval_3c_nmv_opp | Preceding birth interval (in months) | fct | 0 |
bdbr | binterval_6c_nmv | Preceding birth interval (in months) | fct | 0 |
bdbr | binterval_6c_nmv_opp | Preceding birth interval (in months) | fct | 0 |
bdbr | bord_c | Birth order | fct | 0 |
bdbr | sex_mf | Sex of child | fct | 0 |
bdbr | sex_fm | Sex of child | fct | 0 |
bdbr | medu | Mother's education | fct | 0 |
bdbr | medu_opp | Mother's education | fct | 0 |
bdbr | macb | Mother's age at child birth (in years) | dbl | 0 |
bdbr | macb_c | Mother's age at child birth (in years) | fct | 0 |
bdbr | macb_c_opp | Mother's age at child birth (in years) | fct | 0 |
bdbr | marital | Mother's marital status | fct | 0 |
bdbr | pedu | Father's education | fct | 0 |
bdbr | pedu_opp | Father's education | fct | 0 |
bdbr | por | Place of residence | fct | 0 |
bdbr | por_opp | Place of residence | fct | 0 |
bdbr | head_sex_mf | Sex of household head | fct | 0 |
bdbr | head_sex_fm | Sex of household head | fct | 0 |
bdbr | head_age | Age of household head | fct | 0 |
bdbr | head_age_opp | Age of household head | fct | 0 |
bdbr | season | Season during birth | fct | 0 |
bdbr | religion | Religion of the household | fct | 26 |
bdbr | yoi_v007 | Year of interview | dbl+lbl | 0 |
bdbr | yob_v010 | Mother's year of birth | dbl | 0 |
bdbr | yob_b2 | Child's year of birth | dbl | 0 |
bdbr | cyob10y | Birth cohort of index child | fct | 0 |
bdbr | cyob10y_opp | Birth cohort of index child | fct | 0 |
bdbr | cyob5y | Birth cohort of index child | fct | 0 |
bdbr | cyob5y_opp | Birth cohort of index child | fct | 0 |
bdbr | myob10y | Mother's year of birth | fct | 0 |
bdbr | myob10y_opp | Mother's year of birth | fct | 0 |
bdbr | wi_score | Wealth index score | dbl | 0 |
bdbr | wi_qt | Wealth index quintile | fct | 0 |
bdbr | wi_qt_opp | Wealth index quintile | fct | 0 |
bdbr | hhsize | Household size | int | 0 |
bdbr | hhsize_dejure | Num of de jure residents | dbl | 0 |
bdbr | hhsize_defacto | Num of de facto residents | dbl | 0 |
bdbr | hhsize_c | Household size | fct | 0 |
bdbr | hhstruc | Household structure | fct | 0 |
bdbr | hhstruc_opp | Household structure | fct | 0 |
bdbr | hhgen_num | Generations in household | dbl | 0 |
bdbr | hhgen_3c | Generations in household | fct | 0 |
bdbr | hhgen_2c | Generations in household | fct | 0 |
bdbr | hhgen_2c_opp | Generations in household | fct | 0 |
bdbr | infantd_01 | Infant mortality | dbl | 0 |
bdbr | infantd | Infant mortality | fct | 0 |
bdbr | under5d_01 | Under-five mortality | dbl | 0 |
bdbr | under5d | Under-five mortality | fct | 0 |
Nepal DHS pooled data dictionary
Here we check the variable details of the Nepal birth history recode (BR) pooled dataset.
svy_name | variable | label | col_type | missing |
---|---|---|---|---|
npbr | ctr_name | Country name | chr | 0 |
npbr | ctr_code | Country code | dbl | 0 |
npbr | year | Survey year | dbl | 0 |
npbr | uniqueid | Unique birth ID | chr | 0 |
npbr | commid | Community ID | chr | 0 |
npbr | hhid | Household ID | chr | 0 |
npbr | motherid | Mother ID | chr | 0 |
npbr | caseid | case identification | chr | 0 |
npbr | bidx | birth column number | dbl | 0 |
npbr | v000 | country code and phase | chr | 0 |
npbr | v001 | cluster number | dbl | 0 |
npbr | v002 | household number | dbl | 0 |
npbr | v003 | respondent's line number | dbl | 0 |
npbr | v004 | ultimate area unit | dbl | 0 |
npbr | v005 | sample weight | dbl | 0 |
npbr | svy_unf | Un-normalization factor | dbl | 0 |
npbr | svy_wwgt6 | Women's weight un-normalized | dbl | 0 |
npbr | svy_psu | primary sampling unit | chr | 0 |
npbr | svy_strata | sample strata | chr | 0 |
npbr | v006 | month of interview | dbl+lbl | 0 |
npbr | v007 | year of interview | dbl | 0 |
npbr | v008 | date of interview (cmc) | dbl | 0 |
npbr | v009 | respondent's month of birth | dbl+lbl | 0 |
npbr | v010 | respondent's year of birth | dbl+lbl | 0 |
npbr | v011 | date of birth (cmc) | dbl | 0 |
npbr | v012 | current age - respondent | dbl | 0 |
npbr | v021 | primary sampling unit | dbl | 0 |
npbr | v022 | sample strata for sampling errors | dbl+lbl | 0 |
npbr | v025 | type of place of residence | dbl+lbl | 0 |
npbr | v106 | highest educational level | dbl+lbl | 0 |
npbr | v107 | highest year of education | dbl+lbl | 92582 |
npbr | v151 | sex of household head | dbl+lbl | 0 |
npbr | v152 | age of household head | dbl+lbl | 0 |
npbr | v201 | total children ever born | dbl | 0 |
npbr | v209 | births in past year | dbl+lbl | 0 |
npbr | bord | birth order number | dbl | 0 |
npbr | b0 | child is twin | dbl+lbl | 0 |
npbr | b1 | month of birth | dbl+lbl | 0 |
npbr | b2 | year of birth | dbl | 0 |
npbr | b3 | date of birth (cmc) | dbl | 0 |
npbr | b4 | sex of child | dbl+lbl | 0 |
npbr | b5 | child is alive | dbl+lbl | 0 |
npbr | b6 | age at death | dbl+lbl | 142535 |
npbr | b7 | age at death (months-imputed) | dbl | 142533 |
npbr | b8 | current age of child | dbl | 18797 |
npbr | b9 | who child lives with | dbl+lbl | 18797 |
npbr | b10 | completeness of information | dbl+lbl | 0 |
npbr | b11 | preceding birth interval | dbl | 50836 |
npbr | b12 | succeeding birth interval | dbl | 51315 |
npbr | b13 | flag for age at death | dbl+lbl | 142533 |
npbr | str_v001 | chr | 0 | |
npbr | str_v002 | chr | 0 | |
npbr | str_v003 | chr | 0 | |
npbr | str_bord | chr | 0 | |
npbr | str_v021 | chr | 0 | |
npbr | str_v022 | chr | 0 | |
npbr | cons | constant == 1 | dbl | 0 |
npbr | agecon | Age at conception of child (cmc) | dbl | 0 |
npbr | agemort | Age at death of child (cmc) | dbl | 142533 |
npbr | lag_b5 | Prev child survival status | dbl+lbl | 50836 |
npbr | lag_agemort | Prev child age at mortality | dbl | 144071 |
npbr | sibsurv_all | Death scarring | fct | 50836 |
npbr | sibsurv_con | Death scarring | fct | 50836 |
npbr | sibsurv_3c | Death scarring | fct | 50836 |
npbr | sibsurv_all_nmv | Death scarring | fct | 0 |
npbr | sibsurv_con_nmv | Death scarring | fct | 0 |
npbr | sibsurv_3c_nmv | Death scarring | fct | 0 |
npbr | binterval_3c | Preceding birth interval (in months) | fct | 50836 |
npbr | binterval_3c_opp | Preceding birth interval (in months) | fct | 50836 |
npbr | binterval_6c | Preceding birth interval (in months) | fct | 50836 |
npbr | binterval_6c_opp | Preceding birth interval (in months) | fct | 50836 |
npbr | binterval_3c_nmv | Preceding birth interval (in months) | fct | 0 |
npbr | binterval_3c_nmv_opp | Preceding birth interval (in months) | fct | 0 |
npbr | binterval_6c_nmv | Preceding birth interval (in months) | fct | 0 |
npbr | binterval_6c_nmv_opp | Preceding birth interval (in months) | fct | 0 |
npbr | bord_c | Birth order | fct | 0 |
npbr | sex_mf | Sex of child | fct | 0 |
npbr | sex_fm | Sex of child | fct | 0 |
npbr | medu | Mother's education | fct | 0 |
npbr | medu_opp | Mother's education | fct | 0 |
npbr | macb | Mother's age at child birth (in years) | dbl | 0 |
npbr | macb_c | Mother's age at child birth (in years) | fct | 0 |
npbr | macb_c_opp | Mother's age at child birth (in years) | fct | 0 |
npbr | marital | Mother's marital status | fct | 0 |
npbr | pedu | Father's education | fct | 0 |
npbr | pedu_opp | Father's education | fct | 0 |
npbr | por | Place of residence | fct | 0 |
npbr | por_opp | Place of residence | fct | 0 |
npbr | head_sex | Sex of household head | fct | 0 |
npbr | head_sex_opp | Sex of household head | fct | 0 |
npbr | head_age | Age of household head | fct | 0 |
npbr | head_age_opp | Age of household head | fct | 0 |
npbr | birth_month | Month of birth | dbl | 0 |
npbr | season | Season during birth | fct | 0 |
npbr | season_wrong | Season during birth | fct | 0 |
npbr | religion | Religion of the household | fct | 0 |
npbr | nat_lang | Native language of the household | fct | 0 |
npbr | yoi_v007 | Year of interview | dbl | 0 |
npbr | yob_v010 | Mother's year of birth | dbl | 0 |
npbr | yob_b2 | Child's year of birth | dbl | 0 |
npbr | cyob10y | Birth cohort of index child | fct | 0 |
npbr | cyob10y_opp | Birth cohort of index child | fct | 0 |
npbr | cyob5y | Birth cohort of index child | fct | 0 |
npbr | cyob5y_opp | Birth cohort of index child | fct | 0 |
npbr | myob10y | Mother's year of birth | fct | 0 |
npbr | myob10y_opp | Mother's year of birth | fct | 0 |
npbr | ecoreg | Ecological region | fct | 0 |
npbr | wi_score | Wealth index score | dbl | 0 |
npbr | wi_qt | Wealth index quintile | fct | 0 |
npbr | wi_qt_opp | Wealth index quintile | fct | 0 |
npbr | hhsize | Household size | int | 0 |
npbr | hhsize_dejure | Num of de jure residents | dbl | 0 |
npbr | hhsize_defacto | Num of de facto residents | dbl | 0 |
npbr | hhsize_c | Household size | fct | 0 |
npbr | hhstruc | Household structure | fct | 0 |
npbr | hhstruc_opp | Household structure | fct | 0 |
npbr | hhgen_num | Generations in household | dbl | 0 |
npbr | hhgen_3c | Generations in household | fct | 0 |
npbr | hhgen_2c | Generations in household | fct | 0 |
npbr | hhgen_2c_opp | Generations in household | fct | 0 |
npbr | infantd_01 | Infant mortality | dbl | 0 |
npbr | infantd | Infant mortality | fct | 0 |
npbr | under5d_01 | Under-five mortality | dbl | 0 |
npbr | under5d | Under-five mortality | fct | 0 |