Country Profiles¶

`country_profiles` ¶

Country profile download and key indicator functions.

`get_cp(country='all', povline=2.15, version=None, ppp_version=2017, release_version=None, api_version=API_VERSION, fmt='arrow', simplify=True, server=None, dataframe_type='pandas')` ¶

Download country profile data.

Mirrors pipr::get_cp().

Parameters:

Name	Type	Description	Default
`country`	`str \| list[str]`	ISO3 country code(s) or `"all"`.	`'all'`
`povline`	`float \| None`	Poverty line in 2017 PPP USD per day (default 2.15). When `ppp_version=2011` and povline is `None`, defaults to 1.9.	`2.15`
`version`	`str \| None`	Data version string.	`None`
`ppp_version`	`int`	PPP base year (default 2017).	`2017`
`release_version`	`str \| None`	Release date in `YYYYMMDD` format.	`None`
`api_version`	`str`	API version.	`API_VERSION`
`fmt`	`str`	Response format — `"arrow"` (default), `"json"`, or `"csv"`.	`'arrow'`
`simplify`	`bool`	If `True` (default), return a DataFrame.	`True`
`server`	`str \| None`	Server target — `None`/`"prod"`, `"qa"`, or `"dev"`.	`None`
`dataframe_type`	`Literal['pandas', 'polars']`	`"pandas"` (default) or `"polars"`.	`'pandas'`

Returns:

Type	Description
`DataFrame \| PIPResponse`	A DataFrame of country profile data.

Example

import povineq df = povineq.get_cp(country="AGO") df_all = povineq.get_cp()

Source code in src/povineq/country_profiles.py

def get_cp(
    country: str | list[str] = "all",
    povline: float | None = 2.15,
    version: str | None = None,
    ppp_version: int = 2017,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "arrow",
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Download country profile data.

    Mirrors ``pipr::get_cp()``.

    Args:
        country: ISO3 country code(s) or ``"all"``.
        povline: Poverty line in 2017 PPP USD per day (default 2.15).
            When ``ppp_version=2011`` and *povline* is ``None``,
            defaults to 1.9.
        version: Data version string.
        ppp_version: PPP base year (default 2017).
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        fmt: Response format — ``"arrow"`` (default), ``"json"``,
            or ``"csv"``.
        simplify: If ``True`` (default), return a DataFrame.
        server: Server target — ``None``/``"prod"``, ``"qa"``, or ``"dev"``.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.

    Returns:
        A DataFrame of country profile data.

    Example:
        >>> import povineq
        >>> df = povineq.get_cp(country="AGO")
        >>> df_all = povineq.get_cp()
    """
    logger.debug("get_cp", country=country, povline=povline, ppp_version=ppp_version)

    params = CpParams(
        country=country,
        povline=povline,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
        format=fmt,
    )

    query = params.to_query_params()
    query.pop("api_version", None)

    response = build_and_execute(
        ENDPOINT_CP_DOWNLOAD, query, server=server, api_version=api_version
    )
    return parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

`get_cp_ki(country, povline=2.15, version=None, ppp_version=2017, release_version=None, api_version=API_VERSION, simplify=True, server=None, dataframe_type='pandas')` ¶

Get country profile key indicators.

Mirrors pipr::get_cp_ki(). When simplify is True, calls :func:unnest_ki to flatten the nested response.

Parameters:

Name	Type	Description	Default
`country`	`str`	Single ISO3 country code (required).	required
`povline`	`float \| None`	Poverty line in 2017 PPP USD per day (default 2.15). When `ppp_version=2011` and povline is `None`, defaults to 1.9.	`2.15`
`version`	`str \| None`	Data version string.	`None`
`ppp_version`	`int`	PPP base year (default 2017).	`2017`
`release_version`	`str \| None`	Release date in `YYYYMMDD` format.	`None`
`api_version`	`str`	API version.	`API_VERSION`
`simplify`	`bool`	If `True` (default), return a flat DataFrame via :func:`unnest_ki`.	`True`
`server`	`str \| None`	Server target.	`None`
`dataframe_type`	`Literal['pandas', 'polars']`	`"pandas"` (default) or `"polars"`.	`'pandas'`

Returns:

Type	Description
`DataFrame \| PIPResponse`	A flat DataFrame of key indicators when simplify is `True`, or a
`DataFrame \| PIPResponse`	class:`~povineq._response.PIPResponse` when simplify is `False`.

Raises:

Type	Description
`PIPValidationError`	If country is missing or is a list.

Example

import povineq df = povineq.get_cp_ki(country="IDN")

Source code in src/povineq/country_profiles.py

def get_cp_ki(
    country: str,
    povline: float | None = 2.15,
    version: str | None = None,
    ppp_version: int = 2017,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Get country profile key indicators.

    Mirrors ``pipr::get_cp_ki()``. When *simplify* is ``True``,
    calls :func:`unnest_ki` to flatten the nested response.

    Args:
        country: Single ISO3 country code (required).
        povline: Poverty line in 2017 PPP USD per day (default 2.15).
            When ``ppp_version=2011`` and *povline* is ``None``,
            defaults to 1.9.
        version: Data version string.
        ppp_version: PPP base year (default 2017).
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        simplify: If ``True`` (default), return a flat DataFrame via
            :func:`unnest_ki`.
        server: Server target.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.

    Returns:
        A flat DataFrame of key indicators when *simplify* is ``True``, or a
        :class:`~povineq._response.PIPResponse` when *simplify* is ``False``.

    Raises:
        PIPValidationError: If *country* is missing or is a list.

    Example:
        >>> import povineq
        >>> df = povineq.get_cp_ki(country="IDN")
    """
    logger.debug("get_cp_ki", country=country, povline=povline)

    params = CpKiParams(
        country=country,
        povline=povline,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
    )

    query = params.to_query_params()
    query.pop("api_version", None)

    response = build_and_execute(
        ENDPOINT_CP_KEY_INDICATORS, query, server=server, api_version=api_version
    )

    # cp-key-indicators returns JSON only.  Parse the response body once and
    # branch: unnest_ki (simplify=True) operates on the raw dict directly;
    # parse_response (simplify=False) wraps it in a PIPResponse.
    if simplify:
        raw = json.loads(response.text)
        return unnest_ki(raw)

    return parse_response(response, simplify=False, dataframe_type=dataframe_type)

`unnest_ki(raw)` ¶

Flatten nested key-indicator response into a single DataFrame.

Mirrors pipr::unnest_ki(). Extracts headcount, population, GNI, GDP growth, MPM headcount, and shared prosperity tables from the nested JSON structure and merges them on (country_code, reporting_year).

Parameters:

Name	Type	Description	Default
`raw`	`dict \| list`	Parsed JSON from the `cp-key-indicators` endpoint — either a dict (single country) or a list containing one dict.	required

Returns:

Type	Description
`DataFrame`	A flat :class:`~pandas.DataFrame` with one row per
`DataFrame`	`(country_code, reporting_year)` combination.

Example

import povineq df = povineq.get_cp_ki(country="IDN") # calls unnest_ki internally

Source code in src/povineq/country_profiles.py

def unnest_ki(raw: dict | list) -> pd.DataFrame:
    """Flatten nested key-indicator response into a single DataFrame.

    Mirrors ``pipr::unnest_ki()``. Extracts headcount, population, GNI,
    GDP growth, MPM headcount, and shared prosperity tables from the nested
    JSON structure and merges them on ``(country_code, reporting_year)``.

    Args:
        raw: Parsed JSON from the ``cp-key-indicators`` endpoint — either a
            dict (single country) or a list containing one dict.

    Returns:
        A flat :class:`~pandas.DataFrame` with one row per
        ``(country_code, reporting_year)`` combination.

    Example:
        >>> import povineq
        >>> df = povineq.get_cp_ki(country="IDN")  # calls unnest_ki internally
    """
    if isinstance(raw, list):
        raw = raw[0] if raw else {}

    if not raw:
        logger.warning("unnest_ki: received an empty response. Returning an empty DataFrame.")
        return pd.DataFrame()

    def _extract(key: str) -> pd.DataFrame:
        val = raw.get(key)
        if val is None:
            return pd.DataFrame()
        if isinstance(val, list) and len(val) == 1 and not isinstance(val[0], dict):
            # Wrapped list-of-lists
            inner = val[0]
            if isinstance(inner, list):
                return pd.DataFrame(inner) if inner else pd.DataFrame()
        if isinstance(val, list):
            # Could be list-of-dicts directly or list containing a list-of-dicts
            if val and isinstance(val[0], dict):
                return pd.DataFrame(val)
            if val and isinstance(val[0], list):
                return pd.DataFrame(val[0]) if val[0] else pd.DataFrame()
        if isinstance(val, dict):
            return pd.DataFrame([val])
        return pd.DataFrame()

    headcount = _extract("headcount")
    headcount_national = _extract("headcount_national")
    mpm_headcount = _extract("mpm_headcount")
    pop = _extract("pop")
    gni = _extract("gni")
    gdp_growth = _extract("gdp_growth")
    shared_prosperity = _extract("shared_prosperity")

    # Deduplicate GNI and GDP growth on key columns (pipr behaviour)
    merge_cols = ["country_code", "reporting_year"]
    gni = gni.drop_duplicates(subset=merge_cols) if not gni.empty and all(c in gni.columns for c in merge_cols) else gni
    gdp_growth = gdp_growth.drop_duplicates(subset=merge_cols) if not gdp_growth.empty and all(c in gdp_growth.columns for c in merge_cols) else gdp_growth

    # Merge all on (country_code, reporting_year) with full outer joins
    # Merge all sub-tables on (country_code, reporting_year) using outer joins.
    # Start with an empty accumulator; grow it left-to-right so later tables
    # extend rather than overwrite columns from earlier ones.
    dfs = [headcount, headcount_national, mpm_headcount, pop, gni, gdp_growth]
    result = pd.DataFrame()
    for df_part in dfs:
        if df_part.empty:
            continue
        if result.empty:
            result = df_part
        else:
            common = [c for c in merge_cols if c in result.columns and c in df_part.columns]
            if common:
                # Standard case: join on shared key columns.
                result = result.merge(df_part, on=common, how="outer")
            else:
                # No common key columns — a cross join would create a Cartesian
                # product of all rows, silently inflating the output.  Warn the
                # caller so the issue is visible in logs.
                logger.warning(
                    "unnest_ki: no common merge keys found when joining "
                    f"a sub-table with columns {list(df_part.columns)}. "
                    "Performing a cross join, which may produce spurious rows."
                )
                result = result.merge(df_part, how="cross")

    # Append shared_prosperity (merges only on country_code)
    if not shared_prosperity.empty and not result.empty:
        cc_col = "country_code"
        if cc_col in result.columns and cc_col in shared_prosperity.columns:
            result = result.merge(shared_prosperity, on=cc_col, how="outer")

    return result

Country Profiles¶