Pooled DHS data dictionary

Getting started

First we load the required packages.

easypackages::libraries(
  # Data i/o
  "here",                 # relative file path
  "rio",                  # file import-export
  
  # Data manipulation
  "janitor",              # data cleaning fns
  "haven",                # stata, sas, spss data io
  "labelled",             # var labelling
  "readxl",               # excel sheets
  # "scales",               # to change formats and units
  "skimr",                # quick data summary
  "broom",                # view model results
  
  # Analysis output
  "gt",
  # "modelsummary",          # output summary tables
  "gtsummary",            # output summary tables
  "flextable",            # creating tables from objects
  "officer",              # editing in office docs
  
  # R graph related packages
  "ggstats",
  "RColorBrewer",
  # "scales",
  "patchwork",
  
  # Misc packages
  "tidyverse",            # Data manipulation iron man
  "tictoc"                # Code timing
)

Next we turn off scientific notations.

options(scipen = 999)

Next we set the default gtsummary print engine for tables.

theme_gtsummary_printer(print_engine = "flextable")

Now we set the flextable output defaults.

set_flextable_defaults(
  font.size = 11,
  text.align = "left",
  big.mark = "",
  background.color = "white",
  table.layout = "autofit",
  theme_fun = theme_vanilla
)

Document introduction

We document the variable labels and other contents of all variables across all the pooled Demographic and Health Survey (DHS) birth history datasets of South Asian countries here. This will be primary document for viewing what variables will be used for analysis in PhD research. This document will guide us on the variable availability in the pooled datasets of the various South Asian countries.

This document contains variable details for the following Afghanistan DHS surveys:

# Creating the table of pooled surveys to be used for analysis
sasiadhs_tbl <- tibble(
  ccode = c(1:2),
  country = c("Bangladesh", "Nepal"),
  n_sample = c(nrow(bdbr_pl), nrow(npbr_pl)),
  n_variables = c(ncol(bdbr_pl), ncol(npbr_pl))
)

# Convert and view as flextable
sasiadhs_tbl |> 
  mutate(n_sample = prettyNum(n_sample, big.mark = ",")) |> 
  # convert to flextable object
  qflextable() |> 
  align(align = "left", part = "all")
Table 1: South Asian DHS datasets and their sample size to be used for pooling

ccode

country

n_sample

n_variables

1

Bangladesh

349,962

120

2

Nepal

161,330

124

Here we will create basic data dictionaries of each datasets using the labelled package. In our earlier efforts we realized that the method for generation data dictionaries for each dataset are similar. Hence, we will create a function using the look_for() command to generate the data dictionaries and view them.

Bangladesh DHS pooled data dictionary

Here we check the variable details of the Bangladesh birth history recode (BR) pooled dataset.

svy_name

variable

label

col_type

missing

bdbr

ctr_name

Country name

chr

0

bdbr

ctr_code

Country code

dbl

0

bdbr

year

Survey year

dbl

0

bdbr

uniqueid

Unique birth ID

chr

0

bdbr

commid

Community ID

chr

0

bdbr

hhid

Household ID

chr

0

bdbr

motherid

Mother ID

chr

0

bdbr

caseid

case identification

chr

0

bdbr

bidx

birth column number

dbl

0

bdbr

v000

country code and phase

chr

0

bdbr

v001

cluster number

dbl

0

bdbr

v002

household number

dbl

0

bdbr

v003

respondent's line number

dbl

0

bdbr

v004

ultimate area unit

dbl

0

bdbr

v005

sample weight

dbl

0

bdbr

svy_unf

Un-normalization factor

dbl

0

bdbr

svy_wwgt6

Women's weight un-normalized

dbl

0

bdbr

svy_psu

primary sampling unit

chr

0

bdbr

svy_strata

sample strata

chr

0

bdbr

v006

month of interview

dbl

0

bdbr

v007

year of interview

dbl+lbl

0

bdbr

v008

date of interview (cmc)

dbl

0

bdbr

v009

respondent's month of birth

dbl

0

bdbr

v010

respondent's year of birth

dbl

0

bdbr

v011

date of birth (cmc)

dbl

0

bdbr

v012

current age - respondent

dbl

0

bdbr

v021

primary sampling unit

dbl

0

bdbr

v022

sample strata

dbl+lbl

0

bdbr

v025

type of place of residence

dbl+lbl

0

bdbr

v106

highest educational level

dbl+lbl

0

bdbr

v107

highest year of education

dbl+lbl

124078

bdbr

v151

sex of household head

dbl+lbl

0

bdbr

v152

age of household head

dbl+lbl

13

bdbr

v201

total children ever born

dbl

0

bdbr

v209

births in past year

dbl+lbl

0

bdbr

bord

birth order number

dbl

0

bdbr

b0

child is twin

dbl+lbl

0

bdbr

b1

month of birth

dbl

0

bdbr

b2

year of birth

dbl+lbl

0

bdbr

b3

date of birth (cmc)

dbl

0

bdbr

b4

sex of child

dbl+lbl

0

bdbr

b5

child is alive

dbl+lbl

0

bdbr

b6

age at death

dbl+lbl

312326

bdbr

b7

age at death (months-imputed)

dbl

312306

bdbr

b8

current age of child

dbl

37656

bdbr

b9

child lives with whom

dbl+lbl

37656

bdbr

b10

completeness of information

dbl+lbl

0

bdbr

b11

preceding birth interval

dbl

119918

bdbr

b12

succeeding birth interval

dbl

121849

bdbr

b13

flag for age at death

dbl+lbl

312306

bdbr

str_v001

chr

0

bdbr

str_v002

chr

0

bdbr

str_v003

chr

0

bdbr

str_bord

chr

0

bdbr

str_v021

chr

0

bdbr

str_v022

chr

0

bdbr

cons

constant == 1

dbl

0

bdbr

agecon

Age at conception of child (cmc)

dbl

0

bdbr

agemort

Age at death of child (cmc)

dbl

312306

bdbr

lag_b5

Prev child survival status

dbl+lbl

119918

bdbr

lag_agemort

Prev child age at mortality

dbl

315583

bdbr

sibsurv_all

Death scarring

fct

119918

bdbr

sibsurv_con

Death scarring

fct

119918

bdbr

sibsurv_3c

Death scarring

fct

119918

bdbr

sibsurv_all_nmv

Death scarring

fct

0

bdbr

sibsurv_con_nmv

Death scarring

fct

0

bdbr

sibsurv_3c_nmv

Death scarring

fct

0

bdbr

binterval_3c

Preceding birth interval (in months)

fct

119918

bdbr

binterval_3c_opp

Preceding birth interval (in months)

fct

119918

bdbr

binterval_6c

Preceding birth interval (in months)

fct

119918

bdbr

binterval_6c_opp

Preceding birth interval (in months)

fct

119918

bdbr

binterval_3c_nmv

Preceding birth interval (in months)

fct

0

bdbr

binterval_3c_nmv_opp

Preceding birth interval (in months)

fct

0

bdbr

binterval_6c_nmv

Preceding birth interval (in months)

fct

0

bdbr

binterval_6c_nmv_opp

Preceding birth interval (in months)

fct

0

bdbr

bord_c

Birth order

fct

0

bdbr

sex_mf

Sex of child

fct

0

bdbr

sex_fm

Sex of child

fct

0

bdbr

medu

Mother's education

fct

0

bdbr

medu_opp

Mother's education

fct

0

bdbr

macb

Mother's age at child birth (in years)

dbl

0

bdbr

macb_c

Mother's age at child birth (in years)

fct

0

bdbr

macb_c_opp

Mother's age at child birth (in years)

fct

0

bdbr

marital

Mother's marital status

fct

0

bdbr

pedu

Father's education

fct

0

bdbr

pedu_opp

Father's education

fct

0

bdbr

por

Place of residence

fct

0

bdbr

por_opp

Place of residence

fct

0

bdbr

head_sex_mf

Sex of household head

fct

0

bdbr

head_sex_fm

Sex of household head

fct

0

bdbr

head_age

Age of household head

fct

0

bdbr

head_age_opp

Age of household head

fct

0

bdbr

season

Season during birth

fct

0

bdbr

religion

Religion of the household

fct

26

bdbr

yoi_v007

Year of interview

dbl+lbl

0

bdbr

yob_v010

Mother's year of birth

dbl

0

bdbr

yob_b2

Child's year of birth

dbl

0

bdbr

cyob10y

Birth cohort of index child

fct

0

bdbr

cyob10y_opp

Birth cohort of index child

fct

0

bdbr

cyob5y

Birth cohort of index child

fct

0

bdbr

cyob5y_opp

Birth cohort of index child

fct

0

bdbr

myob10y

Mother's year of birth

fct

0

bdbr

myob10y_opp

Mother's year of birth

fct

0

bdbr

wi_score

Wealth index score

dbl

0

bdbr

wi_qt

Wealth index quintile

fct

0

bdbr

wi_qt_opp

Wealth index quintile

fct

0

bdbr

hhsize

Household size

int

0

bdbr

hhsize_dejure

Num of de jure residents

dbl

0

bdbr

hhsize_defacto

Num of de facto residents

dbl

0

bdbr

hhsize_c

Household size

fct

0

bdbr

hhstruc

Household structure

fct

0

bdbr

hhstruc_opp

Household structure

fct

0

bdbr

hhgen_num

Generations in household

dbl

0

bdbr

hhgen_3c

Generations in household

fct

0

bdbr

hhgen_2c

Generations in household

fct

0

bdbr

hhgen_2c_opp

Generations in household

fct

0

bdbr

infantd_01

Infant mortality

dbl

0

bdbr

infantd

Infant mortality

fct

0

bdbr

under5d_01

Under-five mortality

dbl

0

bdbr

under5d

Under-five mortality

fct

0

Nepal DHS pooled data dictionary

Here we check the variable details of the Nepal birth history recode (BR) pooled dataset.

svy_name

variable

label

col_type

missing

npbr

ctr_name

Country name

chr

0

npbr

ctr_code

Country code

dbl

0

npbr

year

Survey year

dbl

0

npbr

uniqueid

Unique birth ID

chr

0

npbr

commid

Community ID

chr

0

npbr

hhid

Household ID

chr

0

npbr

motherid

Mother ID

chr

0

npbr

caseid

case identification

chr

0

npbr

bidx

birth column number

dbl

0

npbr

v000

country code and phase

chr

0

npbr

v001

cluster number

dbl

0

npbr

v002

household number

dbl

0

npbr

v003

respondent's line number

dbl

0

npbr

v004

ultimate area unit

dbl

0

npbr

v005

sample weight

dbl

0

npbr

svy_unf

Un-normalization factor

dbl

0

npbr

svy_wwgt6

Women's weight un-normalized

dbl

0

npbr

svy_psu

primary sampling unit

chr

0

npbr

svy_strata

sample strata

chr

0

npbr

v006

month of interview

dbl+lbl

0

npbr

v007

year of interview

dbl

0

npbr

v008

date of interview (cmc)

dbl

0

npbr

v009

respondent's month of birth

dbl+lbl

0

npbr

v010

respondent's year of birth

dbl+lbl

0

npbr

v011

date of birth (cmc)

dbl

0

npbr

v012

current age - respondent

dbl

0

npbr

v021

primary sampling unit

dbl

0

npbr

v022

sample strata for sampling errors

dbl+lbl

0

npbr

v025

type of place of residence

dbl+lbl

0

npbr

v106

highest educational level

dbl+lbl

0

npbr

v107

highest year of education

dbl+lbl

92582

npbr

v151

sex of household head

dbl+lbl

0

npbr

v152

age of household head

dbl+lbl

0

npbr

v201

total children ever born

dbl

0

npbr

v209

births in past year

dbl+lbl

0

npbr

bord

birth order number

dbl

0

npbr

b0

child is twin

dbl+lbl

0

npbr

b1

month of birth

dbl+lbl

0

npbr

b2

year of birth

dbl

0

npbr

b3

date of birth (cmc)

dbl

0

npbr

b4

sex of child

dbl+lbl

0

npbr

b5

child is alive

dbl+lbl

0

npbr

b6

age at death

dbl+lbl

142535

npbr

b7

age at death (months-imputed)

dbl

142533

npbr

b8

current age of child

dbl

18797

npbr

b9

who child lives with

dbl+lbl

18797

npbr

b10

completeness of information

dbl+lbl

0

npbr

b11

preceding birth interval

dbl

50836

npbr

b12

succeeding birth interval

dbl

51315

npbr

b13

flag for age at death

dbl+lbl

142533

npbr

str_v001

chr

0

npbr

str_v002

chr

0

npbr

str_v003

chr

0

npbr

str_bord

chr

0

npbr

str_v021

chr

0

npbr

str_v022

chr

0

npbr

cons

constant == 1

dbl

0

npbr

agecon

Age at conception of child (cmc)

dbl

0

npbr

agemort

Age at death of child (cmc)

dbl

142533

npbr

lag_b5

Prev child survival status

dbl+lbl

50836

npbr

lag_agemort

Prev child age at mortality

dbl

144071

npbr

sibsurv_all

Death scarring

fct

50836

npbr

sibsurv_con

Death scarring

fct

50836

npbr

sibsurv_3c

Death scarring

fct

50836

npbr

sibsurv_all_nmv

Death scarring

fct

0

npbr

sibsurv_con_nmv

Death scarring

fct

0

npbr

sibsurv_3c_nmv

Death scarring

fct

0

npbr

binterval_3c

Preceding birth interval (in months)

fct

50836

npbr

binterval_3c_opp

Preceding birth interval (in months)

fct

50836

npbr

binterval_6c

Preceding birth interval (in months)

fct

50836

npbr

binterval_6c_opp

Preceding birth interval (in months)

fct

50836

npbr

binterval_3c_nmv

Preceding birth interval (in months)

fct

0

npbr

binterval_3c_nmv_opp

Preceding birth interval (in months)

fct

0

npbr

binterval_6c_nmv

Preceding birth interval (in months)

fct

0

npbr

binterval_6c_nmv_opp

Preceding birth interval (in months)

fct

0

npbr

bord_c

Birth order

fct

0

npbr

sex_mf

Sex of child

fct

0

npbr

sex_fm

Sex of child

fct

0

npbr

medu

Mother's education

fct

0

npbr

medu_opp

Mother's education

fct

0

npbr

macb

Mother's age at child birth (in years)

dbl

0

npbr

macb_c

Mother's age at child birth (in years)

fct

0

npbr

macb_c_opp

Mother's age at child birth (in years)

fct

0

npbr

marital

Mother's marital status

fct

0

npbr

pedu

Father's education

fct

0

npbr

pedu_opp

Father's education

fct

0

npbr

por

Place of residence

fct

0

npbr

por_opp

Place of residence

fct

0

npbr

head_sex

Sex of household head

fct

0

npbr

head_sex_opp

Sex of household head

fct

0

npbr

head_age

Age of household head

fct

0

npbr

head_age_opp

Age of household head

fct

0

npbr

birth_month

Month of birth

dbl

0

npbr

season

Season during birth

fct

0

npbr

season_wrong

Season during birth

fct

0

npbr

religion

Religion of the household

fct

0

npbr

nat_lang

Native language of the household

fct

0

npbr

yoi_v007

Year of interview

dbl

0

npbr

yob_v010

Mother's year of birth

dbl

0

npbr

yob_b2

Child's year of birth

dbl

0

npbr

cyob10y

Birth cohort of index child

fct

0

npbr

cyob10y_opp

Birth cohort of index child

fct

0

npbr

cyob5y

Birth cohort of index child

fct

0

npbr

cyob5y_opp

Birth cohort of index child

fct

0

npbr

myob10y

Mother's year of birth

fct

0

npbr

myob10y_opp

Mother's year of birth

fct

0

npbr

ecoreg

Ecological region

fct

0

npbr

wi_score

Wealth index score

dbl

0

npbr

wi_qt

Wealth index quintile

fct

0

npbr

wi_qt_opp

Wealth index quintile

fct

0

npbr

hhsize

Household size

int

0

npbr

hhsize_dejure

Num of de jure residents

dbl

0

npbr

hhsize_defacto

Num of de facto residents

dbl

0

npbr

hhsize_c

Household size

fct

0

npbr

hhstruc

Household structure

fct

0

npbr

hhstruc_opp

Household structure

fct

0

npbr

hhgen_num

Generations in household

dbl

0

npbr

hhgen_3c

Generations in household

fct

0

npbr

hhgen_2c

Generations in household

fct

0

npbr

hhgen_2c_opp

Generations in household

fct

0

npbr

infantd_01

Infant mortality

dbl

0

npbr

infantd

Infant mortality

fct

0

npbr

under5d_01

Under-five mortality

dbl

0

npbr

under5d

Under-five mortality

fct

0

Back to top