The goal of pipload
is to provide a series of tools to load into memory the PIP microdata. You can load and update the inventory of PIP microdata, as well as find the data most recent version of each country-year-survey data point.
Installation
You can install the development version from GitHub with:
# install.packages("devtools")
devtools::install_github("PIP-Technical-Team/pipload")
Example
Load library
Microdata
Load Two datasets for Paraguay for the Poverty Calculator tool:
# Find the data available.
df <- pip_find_data(
country = "PRY",
year = c(2017, 2018),
tool = "PC"
)
#> Warning: `pip_find_data()` was deprecated in pipload 0.1.13.
#> Please use `pip_find_dlw()` instead.
df$filename
#> [1] "PRY_2017_EPH_V01_M_V02_A_PIP_PC-GPWG.dta"
#> [2] "PRY_2017_EPH_V01_M_V03_A_PIP_PC-GPWG.dta"
#> [3] "PRY_2018_EPH_V01_M_V03_A_PIP_PC-GPWG.dta"
#> [4] "PRY_2018_EPH_V01_M_V04_A_PIP_PC-GPWG.dta"
#> [5] "PRY_2018_EPH_V01_M_V05_A_PIP_PC-GPWG.dta"
# load the data
df2 <- pip_find_data(
country = "PRY",
year = c(2017, 2018),
tool = "PC"
)
#> Warning: `pip_find_data()` was deprecated in pipload 0.1.13.
#> Please use `pip_find_dlw()` instead.
names(df2)
#> [1] "orig" "filename" "country_code" "surveyid_year"
#> [5] "survey_acronym" "vermast" "veralt" "collection"
#> [9] "module" "tool" "source"
load Auxiliary data
Load different types of auxiliary data
# Load CPI
df <- pip_load_aux("cpi")
#> v Most recent version of data loaded:
#> ''//w1wbgencifs01/pip/PIP-Data_QA/_aux/cpi/cpi'.'fst''
head(df)
#> country_code cpi_year survey_year cpi ccf survey_acronym
#> 1: AGO 2000 2000.21 0.033848061 1 HBS
#> 2: AGO 2008 2008.50 0.723337197 1 IBEP-MICS
#> 3: AGO 2018 2018.17 2.934414036 1 IDREA
#> 4: ALB 1996 1996.00 0.444572480 1 EWS
#> 5: ALB 2002 2002.00 0.780533048 1 LSMS
#> 6: ALB 2005 2005.00 0.838737128 1 LSMS
#> change_cpi2011 cpi2011 cpi_domain cpi_domain_value cpi2011_unadj
#> 1: 0 0.033848061 1 1 0.033848061
#> 2: 1 0.723337197 1 1 0.723337197
#> 3: 1 2.934414036 1 1 2.934414036
#> 4: 1 0.444572480 1 1 0.444572480
#> 5: 1 0.780533048 1 1 0.780533048
#> 6: 1 0.838737128 1 1 0.838737128
#> cpi_final_2019 cpi_data_level cpi2011_SM21 cpi2011_unadj_SM21 cpi2005_SM21
#> 1: NA national 0.03385145 0.03385145 0.071889997
#> 2: NA national 0.72328920 0.72328920 1.528669953
#> 3: NA national 2.93543023 2.93543023 NA
#> 4: NA national 0.44446184 0.44446184 0.530049980
#> 5: NA national 0.78033877 0.78033877 0.950504005
#> 6: NA national 0.83852839 0.83852839 1.000000000
#> cpi2017 cpi_id
#> 1: 0.014199691 CPI_v06_M_v01_A
#> 2: 0.303449123 CPI_v06_M_v01_A
#> 3: 1.231023884 CPI_v06_M_v01_A
#> 4: 0.399635267 CPI_v06_M_v01_A
#> 5: 0.701637071 CPI_v06_M_v01_A
#> 6: 0.753957905 CPI_v06_M_v01_A
# load PPP
df <- pip_load_aux("ppp")
#> v Most recent version of data loaded:
#> ''//w1wbgencifs01/pip/PIP-Data_QA/_aux/ppp/ppp'.'fst''
head(df)
#> country_code ppp_year release_version adaptation_version ppp
#> 1: ABW 2005 v1 v1 NA
#> 2: ABW 2011 v1 v1 1.6527510
#> 3: ABW 2011 v1 v2 1.6527513
#> 4: ABW 2011 v2 v1 1.6377631
#> 5: ABW 2011 v2 v2 1.6377631
#> 6: ABW 2017 v1 v1 1.4804807
#> ppp_default ppp_default_by_year ppp_domain ppp_data_level
#> 1: FALSE TRUE 1 national
#> 2: FALSE FALSE 1 national
#> 3: FALSE FALSE 1 national
#> 4: FALSE FALSE 1 national
#> 5: TRUE TRUE 1 national
#> 6: FALSE TRUE 1 national
# Load GDP
df <- pip_load_aux("gdp")
#> v Most recent version of data loaded:
#> ''//w1wbgencifs01/pip/PIP-Data_QA/_aux/gdp/gdp'.'fst''
head(df)
#> country_code year gdp gdp_data_level gdp_domain
#> 1: ABW 1986 17231.380 national national
#> 2: ABW 1987 20262.945 national national
#> 3: ABW 1988 24343.255 national national
#> 4: ABW 1989 27313.495 national national
#> 5: ABW 1990 27884.253 national national
#> 6: ABW 1991 28953.525 national national
measure <- "cpi"
# see versions available
df <- pip_load_aux(measure, version = "available")
#> Versions available for cpi
#> [1] "2022-02-25 14:44:35 EST" "2022-02-23 15:44:16 EST"
#> [3] "2022-01-25 18:08:08 EST" "2022-01-07 14:38:08 EST"
#> [5] "2021-04-16 13:04:48 EDT" "2021-03-29 16:34:08 EDT"
#> [7] "2021-03-03 07:26:44 EST" "2021-02-24 09:44:48 EST"
#> [9] "2021-02-02 06:46:39 EST" "2021-02-01 13:09:02 EST"
#> [11] "2021-01-29 09:23:20 EST" "2021-01-22 06:47:38 EST"
#> [13] "2020-12-23 13:00:06 EST"
df
#> [1] "20220225144435" "20220223154416" "20220125180808" "20220107143808"
#> [5] "20210416130448" "20210329163408" "20210303072644" "20210224094448"
#> [9] "20210202064639" "20210201130902" "20210129092320" "20210122064738"
#> [13] "20201223130006"
# Load version of "2020-08-07 10:15:48 EDT"
df <- pip_load_aux(measure, version = "20220223154416")
#> v Version of data loaded: 2022-02-23 15:44:16:
#> '//w1wbgencifs01/pip/PIP-Data_QA/_aux/cpi/_vintage/cpi_20220223154416.fst'
#> i Labels not applied to versioning data
head(df)
#> country_code cpi_year survey_year cpi ccf survey_acronym
#> 1: AGO 2000 2000.21 0.033848061 1 HBS
#> 2: AGO 2008 2008.50 0.723337197 1 IBEP-MICS
#> 3: AGO 2018 2018.17 2.934414036 1 IDREA
#> 4: ALB 1996 1996.00 0.444572480 1 EWS
#> 5: ALB 2002 2002.00 0.780533048 1 LSMS
#> 6: ALB 2005 2005.00 0.838737128 1 LSMS
#> change_cpi2011 cpi2011 cpi_domain cpi_domain_value cpi2011_unadj
#> 1: 0 0.033848061 1 1 0.033848061
#> 2: 1 0.723337197 1 1 0.723337197
#> 3: 1 2.934414036 1 1 2.934414036
#> 4: 1 0.444572480 1 1 0.444572480
#> 5: 1 0.780533048 1 1 0.780533048
#> 6: 1 0.838737128 1 1 0.838737128
#> cpi_final_2019 cpi_data_level cpi2011_SM21 cpi2011_unadj_SM21 cpi2005_SM21
#> 1: NA national 0.03385145 0.03385145 0.071889997
#> 2: NA national 0.72328920 0.72328920 1.528669953
#> 3: NA national 2.93543023 2.93543023 NA
#> 4: NA national 0.44446184 0.44446184 0.530049980
#> 5: NA national 0.78033877 0.78033877 0.950504005
#> 6: NA national 0.83852839 0.83852839 1.000000000
#> cpi2017 cpi_id
#> 1: 0.014199691 CPI_v06_M_v01_A
#> 2: 0.303449123 CPI_v06_M_v01_A
#> 3: 1.231023884 CPI_v06_M_v01_A
#> 4: 0.399635267 CPI_v06_M_v01_A
#> 5: 0.701637071 CPI_v06_M_v01_A
#> 6: 0.753957905 CPI_v06_M_v01_A
# Load one version before current one (i.e., load previous version)
df <- pip_load_aux(measure, version = -1)
#> v Version of data loaded: 2022-02-23 15:44:16:
#> '//w1wbgencifs01/pip/PIP-Data_QA/_aux/cpi/_vintage/cpi_20220223154416.fst'
#> i Labels not applied to versioning data
head(df)
#> country_code cpi_year survey_year cpi ccf survey_acronym
#> 1: AGO 2000 2000.21 0.033848061 1 HBS
#> 2: AGO 2008 2008.50 0.723337197 1 IBEP-MICS
#> 3: AGO 2018 2018.17 2.934414036 1 IDREA
#> 4: ALB 1996 1996.00 0.444572480 1 EWS
#> 5: ALB 2002 2002.00 0.780533048 1 LSMS
#> 6: ALB 2005 2005.00 0.838737128 1 LSMS
#> change_cpi2011 cpi2011 cpi_domain cpi_domain_value cpi2011_unadj
#> 1: 0 0.033848061 1 1 0.033848061
#> 2: 1 0.723337197 1 1 0.723337197
#> 3: 1 2.934414036 1 1 2.934414036
#> 4: 1 0.444572480 1 1 0.444572480
#> 5: 1 0.780533048 1 1 0.780533048
#> 6: 1 0.838737128 1 1 0.838737128
#> cpi_final_2019 cpi_data_level cpi2011_SM21 cpi2011_unadj_SM21 cpi2005_SM21
#> 1: NA national 0.03385145 0.03385145 0.071889997
#> 2: NA national 0.72328920 0.72328920 1.528669953
#> 3: NA national 2.93543023 2.93543023 NA
#> 4: NA national 0.44446184 0.44446184 0.530049980
#> 5: NA national 0.78033877 0.78033877 0.950504005
#> 6: NA national 0.83852839 0.83852839 1.000000000
#> cpi2017 cpi_id
#> 1: 0.014199691 CPI_v06_M_v01_A
#> 2: 0.303449123 CPI_v06_M_v01_A
#> 3: 1.231023884 CPI_v06_M_v01_A
#> 4: 0.399635267 CPI_v06_M_v01_A
#> 5: 0.701637071 CPI_v06_M_v01_A
#> 6: 0.753957905 CPI_v06_M_v01_A
Inventory of microdata
Check if inventory is up to data and udpate
# Update inventory of PRY
pip_inventory("update", country = "PRY")
#> i reading PIP directory
#> v reading PIP directory [3.2s]
#>
#> i file 'inventory.fst' is up to date.
#> No update performed
# Load inventory
df <- pip_inventory()
df$filename[1:5]
#> [1] "AGO_2000_HBS_V01_M_V01_A_PIP_PC-GPWG.dta"
#> [2] "AGO_2008_IBEP-MICS_V02_M_V02_A_PIP_PC-GPWG.dta"
#> [3] "AGO_2008_IBEP-MICS_V02_M_V02_A_PIP_TB-ALL.dta"
#> [4] "AGO_2018_IDREA_V01_M_V01_A_PIP_PC-GPWG.dta"
#> [5] "AGO_2018_IDREA_V01_M_V01_A_PIP_TB-ALL.dta"