Skip to content

povineq (top-level)

povineq

povineq — Python wrapper for the World Bank PIP API.

Usage::

import povineq

# Core statistics
df = povineq.get_stats(country="AGO", year=2000)
df = povineq.get_wb()
df = povineq.get_agg(aggregate="fcv")

# Country profiles
df = povineq.get_cp(country="AGO")
df = povineq.get_cp_ki(country="IDN")

# Auxiliary tables
tables = povineq.get_aux()
df = povineq.get_aux("gdp")
df = povineq.get_countries()

# Info & cache
status = povineq.check_api()
info = povineq.get_pip_info()
df_versions = povineq.get_versions()
povineq.delete_cache()
cache = povineq.get_cache_info()

PIPAPIError(status_code, error_message='', details='', valid_values='')

Bases: PIPError

API returned a structured error response (4xx/5xx).

Attributes:

Name Type Description
status_code

HTTP status code of the response.

error_message

Short error description from the API.

details

Additional detail message, if provided.

valid_values

Comma-separated valid values, if provided.

Source code in src/povineq/_errors.py
def __init__(
    self,
    status_code: int,
    error_message: str = "",
    details: str = "",
    valid_values: str = "",
) -> None:
    self.status_code = status_code
    self.error_message = error_message
    self.details = details
    self.valid_values = valid_values
    parts = [p for p in [error_message, details, valid_values] if p]
    super().__init__(f"HTTP {status_code}: {' | '.join(parts)}")

PIPConnectionError

Bases: PIPError

Network connectivity issue — cannot reach the API.

PIPError

Bases: Exception

Base exception for all PIP API errors.

PIPRateLimitError(retry_after_seconds=0)

Bases: PIPError

API rate limit exceeded (HTTP 429).

Attributes:

Name Type Description
retry_after_seconds

Suggested wait time in seconds before retrying.

Source code in src/povineq/_errors.py
def __init__(self, retry_after_seconds: float = 0) -> None:
    self.retry_after_seconds = retry_after_seconds
    super().__init__(
        f"Rate limit exceeded. Retry after {retry_after_seconds:.0f} seconds."
    )

PIPValidationError

Bases: PIPError

Invalid parameter values detected before the API call.

PIPResponse(url, status, content_type, content, response) dataclass

Full API response object returned when simplify=False.

Equivalent to pipr's pip_api S3 class.

Attributes:

Name Type Description
url str

The request URL as a string.

status int

HTTP status code.

content_type str

Content-Type header value.

content DataFrame

Parsed data as a pandas (or polars) DataFrame.

response Response

The underlying :class:httpx.Response.

delete_cache()

Delete all cached HTTP responses.

Removes the entire cache directory and re-creates an empty one so subsequent calls can start fresh.

Example

import povineq povineq.delete_cache()

Source code in src/povineq/_cache.py
def delete_cache() -> None:
    """Delete all cached HTTP responses.

    Removes the entire cache directory and re-creates an empty one so
    subsequent calls can start fresh.

    Example:
        >>> import povineq
        >>> povineq.delete_cache()
    """
    cache_path = _cache_dir()
    cached = list(cache_path.iterdir())
    if not cached:
        logger.info("Cache is empty. Nothing to delete.")
        return

    shutil.rmtree(cache_path)
    # Reset the lru_cache so the next call recreates the directory entry.
    if hasattr(_cache_dir, "cache_clear"):
        _cache_dir.cache_clear()
    cache_path.mkdir(parents=True, exist_ok=True)
    logger.info("All {} cached item(s) have been deleted.", len(cached))

get_cache_info()

Return statistics about the current HTTP response cache.

Returns:

Type Description
dict[str, object]

A dictionary with keys:

dict[str, object]
  • "path": absolute path to the cache directory.
dict[str, object]
  • "n_files": number of cached response files.
dict[str, object]
  • "total_bytes": total size of the cache in bytes.
Example

import povineq info = povineq.get_cache_info() print(info["n_files"])

Source code in src/povineq/_cache.py
def get_cache_info() -> dict[str, object]:
    """Return statistics about the current HTTP response cache.

    Returns:
        A dictionary with keys:

        - ``"path"``: absolute path to the cache directory.
        - ``"n_files"``: number of cached response files.
        - ``"total_bytes"``: total size of the cache in bytes.

    Example:
        >>> import povineq
        >>> info = povineq.get_cache_info()
        >>> print(info["n_files"])
    """
    cache_path = _cache_dir()
    files = list(cache_path.rglob("*"))
    files = [f for f in files if f.is_file()]
    total_bytes = sum(f.stat().st_size for f in files)
    return {
        "path": str(cache_path),
        "n_files": len(files),
        "total_bytes": total_bytes,
    }

call_aux(table=None)

Retrieve a previously stored auxiliary table from memory.

Mirrors pipr::call_aux().

Parameters:

Name Type Description Default
table str | None

Table name to retrieve. If None, lists all stored tables.

None

Returns:

Type Description
DataFrame | list[str]

The stored DataFrame, or a list of stored table names.

Raises:

Type Description
KeyError

If the requested table is not in the store.

Example

import povineq povineq.get_aux("gdp", assign_tb=True) df = povineq.call_aux("gdp")

Source code in src/povineq/auxiliary.py
def call_aux(table: str | None = None) -> pd.DataFrame | list[str]:
    """Retrieve a previously stored auxiliary table from memory.

    Mirrors ``pipr::call_aux()``.

    Args:
        table: Table name to retrieve. If ``None``, lists all stored tables.

    Returns:
        The stored DataFrame, or a list of stored table names.

    Raises:
        KeyError: If the requested table is not in the store.

    Example:
        >>> import povineq
        >>> povineq.get_aux("gdp", assign_tb=True)
        >>> df = povineq.call_aux("gdp")
    """
    return _call_aux_store(table)

display_aux(version=None, ppp_version=None, release_version=None, api_version=API_VERSION, fmt='json', simplify=True, server=None)

Display available auxiliary tables.

Fetches the list of auxiliary tables and prints them. Mirrors pipr::display_aux().

Parameters:

Name Type Description Default
version str | None

Data version string.

None
ppp_version int | None

PPP base year.

None
release_version str | None

Release date in YYYYMMDD format.

None
api_version str

API version.

API_VERSION
fmt str

Response format.

'json'
simplify bool

Passed to :func:get_aux.

True
server str | None

Server target.

None

Returns:

Type Description
DataFrame | list[str]

List of available table name strings.

Example

import povineq povineq.display_aux()

Source code in src/povineq/auxiliary.py
def display_aux(
    version: str | None = None,
    ppp_version: int | None = None,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "json",
    simplify: bool = True,
    server: str | None = None,
) -> pd.DataFrame | list[str]:
    """Display available auxiliary tables.

    Fetches the list of auxiliary tables and prints them. Mirrors
    ``pipr::display_aux()``.

    Args:
        version: Data version string.
        ppp_version: PPP base year.
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        fmt: Response format.
        simplify: Passed to :func:`get_aux`.
        server: Server target.

    Returns:
        List of available table name strings.

    Example:
        >>> import povineq
        >>> povineq.display_aux()
    """
    result = get_aux(
        table=None,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
        fmt=fmt,
        simplify=simplify,
        server=server,
    )

    if isinstance(result, list):
        logger.info("Available auxiliary tables", tables=result)
        return result

    return result  # type: ignore[return-value]

get_aux(table=None, version=None, ppp_version=None, release_version=None, api_version=API_VERSION, fmt='json', simplify=True, server=None, dataframe_type='pandas', assign_tb=False, replace=False)

Fetch an auxiliary dataset from the PIP API.

When no table is specified, returns a list of available table names. Mirrors pipr::get_aux().

Parameters:

Name Type Description Default
table str | None

Auxiliary table name (e.g. "gdp", "cpi"). If None, returns a list of available table names.

None
version str | None

Data version string.

None
ppp_version int | None

PPP base year.

None
release_version str | None

Release date in YYYYMMDD format.

None
api_version str

API version.

API_VERSION
fmt str

Response format — "json" (default) or "csv". Arrow is not supported for auxiliary tables.

'json'
simplify bool

If True (default), return a DataFrame.

True
server str | None

Server target.

None
dataframe_type Literal['pandas', 'polars']

"pandas" (default) or "polars".

'pandas'
assign_tb bool | str

If False (default), return data normally. If True, store the table in memory under its own name. If a string, store it under that name.

False
replace bool

If True, overwrite existing in-memory tables.

False

Returns:

Type Description
DataFrame | list[str] | PIPResponse | bool
  • A list[str] of table names when table is None.
DataFrame | list[str] | PIPResponse | bool
  • A :class:~pandas.DataFrame when simplify is True.
DataFrame | list[str] | PIPResponse | bool
  • A :class:~povineq._response.PIPResponse when simplify is False.
DataFrame | list[str] | PIPResponse | bool
  • True when assign_tb is set and the table was stored.
Example

import povineq tables = povineq.get_aux() # list of available tables df = povineq.get_aux("gdp") # fetch GDP table povineq.get_aux("cpi", assign_tb=True) # fetch and store in memory

Source code in src/povineq/auxiliary.py
def get_aux(
    table: str | None = None,
    version: str | None = None,
    ppp_version: int | None = None,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "json",
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
    assign_tb: bool | str = False,
    replace: bool = False,
) -> pd.DataFrame | list[str] | PIPResponse | bool:
    """Fetch an auxiliary dataset from the PIP API.

    When no *table* is specified, returns a list of available table names.
    Mirrors ``pipr::get_aux()``.

    Args:
        table: Auxiliary table name (e.g. ``"gdp"``, ``"cpi"``). If ``None``,
            returns a list of available table names.
        version: Data version string.
        ppp_version: PPP base year.
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        fmt: Response format — ``"json"`` (default) or ``"csv"``.
            Arrow is not supported for auxiliary tables.
        simplify: If ``True`` (default), return a DataFrame.
        server: Server target.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.
        assign_tb: If ``False`` (default), return data normally. If ``True``,
            store the table in memory under its own name. If a string, store
            it under that name.
        replace: If ``True``, overwrite existing in-memory tables.

    Returns:
        - A ``list[str]`` of table names when *table* is ``None``.
        - A :class:`~pandas.DataFrame` when *simplify* is ``True``.
        - A :class:`~povineq._response.PIPResponse` when *simplify* is ``False``.
        - ``True`` when *assign_tb* is set and the table was stored.

    Example:
        >>> import povineq
        >>> tables = povineq.get_aux()          # list of available tables
        >>> df = povineq.get_aux("gdp")         # fetch GDP table
        >>> povineq.get_aux("cpi", assign_tb=True)  # fetch and store in memory
    """
    params = AuxParams(
        table=table,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
        format=fmt,
    )

    if table is None:
        # Return list of available tables
        query: dict[str, str] = {}
        if version is not None:
            query["version"] = version
        if release_version is not None:
            query["release_version"] = release_version

        response = build_and_execute(ENDPOINT_AUX, query, server=server, api_version=api_version)
        result = parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

        if simplify and isinstance(result, pd.DataFrame) and "tables" in result.columns:
            # pd.json_normalize packs {"tables": [...]} into a single-row df;
            # the cell value is the list itself — unwrap it when needed.
            raw = result["tables"].iloc[0]
            tables_list: list[str] = raw if isinstance(raw, list) else result["tables"].tolist()
            logger.info("Available auxiliary tables", tables=tables_list)
            return tables_list

        return result  # type: ignore[return-value]

    # Fetch specific table
    query = params.to_query_params()
    query.pop("api_version", None)

    response = build_and_execute(ENDPOINT_AUX, query, server=server, api_version=api_version)
    rt = parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

    if assign_tb is not False:
        tb_name: str
        if assign_tb is True:
            tb_name = table
        elif isinstance(assign_tb, str):
            tb_name = assign_tb
        else:
            raise ValueError("assign_tb must be a bool or a string.")

        if not isinstance(rt, pd.DataFrame):
            logger.warning(
                "assign_tb requires simplify=True to store the table in memory; "
                "got a PIPResponse object. The table was NOT stored."
            )
        elif isinstance(rt, pd.DataFrame):
            return set_aux(tb_name, rt, replace=replace)

    return rt  # type: ignore[return-value]

get_cp(country='all', povline=2.15, version=None, ppp_version=2017, release_version=None, api_version=API_VERSION, fmt='arrow', simplify=True, server=None, dataframe_type='pandas')

Download country profile data.

Mirrors pipr::get_cp().

Parameters:

Name Type Description Default
country str | list[str]

ISO3 country code(s) or "all".

'all'
povline float | None

Poverty line in 2017 PPP USD per day (default 2.15). When ppp_version=2011 and povline is None, defaults to 1.9.

2.15
version str | None

Data version string.

None
ppp_version int

PPP base year (default 2017).

2017
release_version str | None

Release date in YYYYMMDD format.

None
api_version str

API version.

API_VERSION
fmt str

Response format — "arrow" (default), "json", or "csv".

'arrow'
simplify bool

If True (default), return a DataFrame.

True
server str | None

Server target — None/"prod", "qa", or "dev".

None
dataframe_type Literal['pandas', 'polars']

"pandas" (default) or "polars".

'pandas'

Returns:

Type Description
DataFrame | PIPResponse

A DataFrame of country profile data.

Example

import povineq df = povineq.get_cp(country="AGO") df_all = povineq.get_cp()

Source code in src/povineq/country_profiles.py
def get_cp(
    country: str | list[str] = "all",
    povline: float | None = 2.15,
    version: str | None = None,
    ppp_version: int = 2017,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "arrow",
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Download country profile data.

    Mirrors ``pipr::get_cp()``.

    Args:
        country: ISO3 country code(s) or ``"all"``.
        povline: Poverty line in 2017 PPP USD per day (default 2.15).
            When ``ppp_version=2011`` and *povline* is ``None``,
            defaults to 1.9.
        version: Data version string.
        ppp_version: PPP base year (default 2017).
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        fmt: Response format — ``"arrow"`` (default), ``"json"``,
            or ``"csv"``.
        simplify: If ``True`` (default), return a DataFrame.
        server: Server target — ``None``/``"prod"``, ``"qa"``, or ``"dev"``.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.

    Returns:
        A DataFrame of country profile data.

    Example:
        >>> import povineq
        >>> df = povineq.get_cp(country="AGO")
        >>> df_all = povineq.get_cp()
    """
    logger.debug("get_cp", country=country, povline=povline, ppp_version=ppp_version)

    params = CpParams(
        country=country,
        povline=povline,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
        format=fmt,
    )

    query = params.to_query_params()
    query.pop("api_version", None)

    response = build_and_execute(
        ENDPOINT_CP_DOWNLOAD, query, server=server, api_version=api_version
    )
    return parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

get_cp_ki(country, povline=2.15, version=None, ppp_version=2017, release_version=None, api_version=API_VERSION, simplify=True, server=None, dataframe_type='pandas')

Get country profile key indicators.

Mirrors pipr::get_cp_ki(). When simplify is True, calls :func:unnest_ki to flatten the nested response.

Parameters:

Name Type Description Default
country str

Single ISO3 country code (required).

required
povline float | None

Poverty line in 2017 PPP USD per day (default 2.15). When ppp_version=2011 and povline is None, defaults to 1.9.

2.15
version str | None

Data version string.

None
ppp_version int

PPP base year (default 2017).

2017
release_version str | None

Release date in YYYYMMDD format.

None
api_version str

API version.

API_VERSION
simplify bool

If True (default), return a flat DataFrame via :func:unnest_ki.

True
server str | None

Server target.

None
dataframe_type Literal['pandas', 'polars']

"pandas" (default) or "polars".

'pandas'

Returns:

Type Description
DataFrame | PIPResponse

A flat DataFrame of key indicators when simplify is True, or a

DataFrame | PIPResponse

class:~povineq._response.PIPResponse when simplify is False.

Raises:

Type Description
PIPValidationError

If country is missing or is a list.

Example

import povineq df = povineq.get_cp_ki(country="IDN")

Source code in src/povineq/country_profiles.py
def get_cp_ki(
    country: str,
    povline: float | None = 2.15,
    version: str | None = None,
    ppp_version: int = 2017,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Get country profile key indicators.

    Mirrors ``pipr::get_cp_ki()``. When *simplify* is ``True``,
    calls :func:`unnest_ki` to flatten the nested response.

    Args:
        country: Single ISO3 country code (required).
        povline: Poverty line in 2017 PPP USD per day (default 2.15).
            When ``ppp_version=2011`` and *povline* is ``None``,
            defaults to 1.9.
        version: Data version string.
        ppp_version: PPP base year (default 2017).
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        simplify: If ``True`` (default), return a flat DataFrame via
            :func:`unnest_ki`.
        server: Server target.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.

    Returns:
        A flat DataFrame of key indicators when *simplify* is ``True``, or a
        :class:`~povineq._response.PIPResponse` when *simplify* is ``False``.

    Raises:
        PIPValidationError: If *country* is missing or is a list.

    Example:
        >>> import povineq
        >>> df = povineq.get_cp_ki(country="IDN")
    """
    logger.debug("get_cp_ki", country=country, povline=povline)

    params = CpKiParams(
        country=country,
        povline=povline,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
    )

    query = params.to_query_params()
    query.pop("api_version", None)

    response = build_and_execute(
        ENDPOINT_CP_KEY_INDICATORS, query, server=server, api_version=api_version
    )

    # cp-key-indicators returns JSON only.  Parse the response body once and
    # branch: unnest_ki (simplify=True) operates on the raw dict directly;
    # parse_response (simplify=False) wraps it in a PIPResponse.
    if simplify:
        raw = json.loads(response.text)
        return unnest_ki(raw)

    return parse_response(response, simplify=False, dataframe_type=dataframe_type)

unnest_ki(raw)

Flatten nested key-indicator response into a single DataFrame.

Mirrors pipr::unnest_ki(). Extracts headcount, population, GNI, GDP growth, MPM headcount, and shared prosperity tables from the nested JSON structure and merges them on (country_code, reporting_year).

Parameters:

Name Type Description Default
raw dict | list

Parsed JSON from the cp-key-indicators endpoint — either a dict (single country) or a list containing one dict.

required

Returns:

Type Description
DataFrame

A flat :class:~pandas.DataFrame with one row per

DataFrame

(country_code, reporting_year) combination.

Example

import povineq df = povineq.get_cp_ki(country="IDN") # calls unnest_ki internally

Source code in src/povineq/country_profiles.py
def unnest_ki(raw: dict | list) -> pd.DataFrame:
    """Flatten nested key-indicator response into a single DataFrame.

    Mirrors ``pipr::unnest_ki()``. Extracts headcount, population, GNI,
    GDP growth, MPM headcount, and shared prosperity tables from the nested
    JSON structure and merges them on ``(country_code, reporting_year)``.

    Args:
        raw: Parsed JSON from the ``cp-key-indicators`` endpoint — either a
            dict (single country) or a list containing one dict.

    Returns:
        A flat :class:`~pandas.DataFrame` with one row per
        ``(country_code, reporting_year)`` combination.

    Example:
        >>> import povineq
        >>> df = povineq.get_cp_ki(country="IDN")  # calls unnest_ki internally
    """
    if isinstance(raw, list):
        raw = raw[0] if raw else {}

    if not raw:
        logger.warning("unnest_ki: received an empty response. Returning an empty DataFrame.")
        return pd.DataFrame()

    def _extract(key: str) -> pd.DataFrame:
        val = raw.get(key)
        if val is None:
            return pd.DataFrame()
        if isinstance(val, list) and len(val) == 1 and not isinstance(val[0], dict):
            # Wrapped list-of-lists
            inner = val[0]
            if isinstance(inner, list):
                return pd.DataFrame(inner) if inner else pd.DataFrame()
        if isinstance(val, list):
            # Could be list-of-dicts directly or list containing a list-of-dicts
            if val and isinstance(val[0], dict):
                return pd.DataFrame(val)
            if val and isinstance(val[0], list):
                return pd.DataFrame(val[0]) if val[0] else pd.DataFrame()
        if isinstance(val, dict):
            return pd.DataFrame([val])
        return pd.DataFrame()

    headcount = _extract("headcount")
    headcount_national = _extract("headcount_national")
    mpm_headcount = _extract("mpm_headcount")
    pop = _extract("pop")
    gni = _extract("gni")
    gdp_growth = _extract("gdp_growth")
    shared_prosperity = _extract("shared_prosperity")

    # Deduplicate GNI and GDP growth on key columns (pipr behaviour)
    merge_cols = ["country_code", "reporting_year"]
    gni = gni.drop_duplicates(subset=merge_cols) if not gni.empty and all(c in gni.columns for c in merge_cols) else gni
    gdp_growth = gdp_growth.drop_duplicates(subset=merge_cols) if not gdp_growth.empty and all(c in gdp_growth.columns for c in merge_cols) else gdp_growth

    # Merge all on (country_code, reporting_year) with full outer joins
    # Merge all sub-tables on (country_code, reporting_year) using outer joins.
    # Start with an empty accumulator; grow it left-to-right so later tables
    # extend rather than overwrite columns from earlier ones.
    dfs = [headcount, headcount_national, mpm_headcount, pop, gni, gdp_growth]
    result = pd.DataFrame()
    for df_part in dfs:
        if df_part.empty:
            continue
        if result.empty:
            result = df_part
        else:
            common = [c for c in merge_cols if c in result.columns and c in df_part.columns]
            if common:
                # Standard case: join on shared key columns.
                result = result.merge(df_part, on=common, how="outer")
            else:
                # No common key columns — a cross join would create a Cartesian
                # product of all rows, silently inflating the output.  Warn the
                # caller so the issue is visible in logs.
                logger.warning(
                    "unnest_ki: no common merge keys found when joining "
                    f"a sub-table with columns {list(df_part.columns)}. "
                    "Performing a cross join, which may produce spurious rows."
                )
                result = result.merge(df_part, how="cross")

    # Append shared_prosperity (merges only on country_code)
    if not shared_prosperity.empty and not result.empty:
        cc_col = "country_code"
        if cc_col in result.columns and cc_col in shared_prosperity.columns:
            result = result.merge(shared_prosperity, on=cc_col, how="outer")

    return result

check_api(api_version=API_VERSION, server=None)

Test connectivity to the PIP API.

Hits the health-check endpoint and returns the parsed response. Mirrors pipr::check_api().

Parameters:

Name Type Description Default
api_version str

API version (only "v1" currently).

API_VERSION
server str | None

Server target — None/"prod", "qa", or "dev".

None

Returns:

Type Description
dict

A dict with the health-check response from the API.

Raises:

Type Description
PIPConnectionError

If the network is unreachable.

PIPAPIError

If the health-check endpoint returns an error.

Example

import povineq status = povineq.check_api()

Source code in src/povineq/info.py
def check_api(
    api_version: str = API_VERSION,
    server: str | None = None,
) -> dict:
    """Test connectivity to the PIP API.

    Hits the ``health-check`` endpoint and returns the parsed response.
    Mirrors ``pipr::check_api()``.

    Args:
        api_version: API version (only ``"v1"`` currently).
        server: Server target — ``None``/``"prod"``, ``"qa"``, or ``"dev"``.

    Returns:
        A dict with the health-check response from the API.

    Raises:
        PIPConnectionError: If the network is unreachable.
        PIPAPIError: If the health-check endpoint returns an error.

    Example:
        >>> import povineq
        >>> status = povineq.check_api()
    """
    logger.debug("check_api()")
    response = build_and_execute(ENDPOINT_HEALTH_CHECK, {}, server=server, api_version=api_version)
    result = parse_response(response, simplify=False, is_raw=True)
    if isinstance(result, dict):
        return result
    return {"status": response.status_code}

get_pip_info(api_version=API_VERSION, server=None)

Get metadata about the PIP API.

Mirrors pipr::get_pip_info().

Parameters:

Name Type Description Default
api_version str

API version (only "v1" currently).

API_VERSION
server str | None

Server target.

None

Returns:

Type Description
dict

A dict with API metadata (version, endpoints, etc.).

Example

import povineq info = povineq.get_pip_info()

Source code in src/povineq/info.py
def get_pip_info(
    api_version: str = API_VERSION,
    server: str | None = None,
) -> dict:
    """Get metadata about the PIP API.

    Mirrors ``pipr::get_pip_info()``.

    Args:
        api_version: API version (only ``"v1"`` currently).
        server: Server target.

    Returns:
        A dict with API metadata (version, endpoints, etc.).

    Example:
        >>> import povineq
        >>> info = povineq.get_pip_info()
    """
    logger.debug("get_pip_info()")
    response = build_and_execute(ENDPOINT_PIP_INFO, {}, server=server, api_version=api_version)
    result = parse_response(response, simplify=False, is_raw=True)
    if isinstance(result, dict):
        return result
    return {}

get_versions(api_version=API_VERSION, server=None, simplify=True, dataframe_type='pandas')

List available data versions.

Mirrors pipr::get_versions().

Parameters:

Name Type Description Default
api_version str

API version (only "v1" currently).

API_VERSION
server str | None

Server target.

None
simplify bool

If True (default), return a DataFrame.

True
dataframe_type Literal['pandas', 'polars']

"pandas" (default) or "polars".

'pandas'

Returns:

Type Description
DataFrame | dict | list

A DataFrame of available versions when simplify is True,

DataFrame | dict | list

or the raw dict/list otherwise.

Example

import povineq df = povineq.get_versions()

Source code in src/povineq/info.py
def get_versions(
    api_version: str = API_VERSION,
    server: str | None = None,
    simplify: bool = True,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | dict | list:
    """List available data versions.

    Mirrors ``pipr::get_versions()``.

    Args:
        api_version: API version (only ``"v1"`` currently).
        server: Server target.
        simplify: If ``True`` (default), return a DataFrame.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.

    Returns:
        A DataFrame of available versions when *simplify* is ``True``,
        or the raw dict/list otherwise.

    Example:
        >>> import povineq
        >>> df = povineq.get_versions()
    """
    logger.debug("get_versions()")
    response = build_and_execute(ENDPOINT_VERSIONS, {}, server=server, api_version=api_version)
    return parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

get_agg(year='all', povline=None, version=None, ppp_version=None, release_version=None, aggregate=None, api_version=API_VERSION, fmt='json', simplify=True, server=None, dataframe_type='pandas')

Get custom aggregate statistics (FCV, regional, vintage, etc.).

Mirrors pipr::get_agg().

Parameters:

Name Type Description Default
year str | int | list[int]

Year(s) or "all".

'all'
povline float | None

Poverty line in 2017 PPP USD per day.

None
version str | None

Data version string.

None
ppp_version int | None

PPP base year.

None
release_version str | None

Release date in YYYYMMDD format.

None
aggregate str | None

Aggregate name (e.g. "fcv").

None
api_version str

API version.

API_VERSION
fmt str

Response format — "json" (default) or "csv".

'json'
simplify bool

If True (default), return a DataFrame.

True
server str | None

Server target.

None
dataframe_type Literal['pandas', 'polars']

"pandas" or "polars".

'pandas'

Returns:

Type Description
DataFrame | PIPResponse

A DataFrame of custom aggregate statistics.

Example

import povineq df = povineq.get_agg(aggregate="fcv", server="qa")

Source code in src/povineq/stats.py
def get_agg(
    year: str | int | list[int] = "all",
    povline: float | None = None,
    version: str | None = None,
    ppp_version: int | None = None,
    release_version: str | None = None,
    aggregate: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "json",
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Get custom aggregate statistics (FCV, regional, vintage, etc.).

    Mirrors ``pipr::get_agg()``.

    Args:
        year: Year(s) or ``"all"``.
        povline: Poverty line in 2017 PPP USD per day.
        version: Data version string.
        ppp_version: PPP base year.
        release_version: Release date in ``YYYYMMDD`` format.
        aggregate: Aggregate name (e.g. ``"fcv"``).
        api_version: API version.
        fmt: Response format — ``"json"`` (default) or ``"csv"``.
        simplify: If ``True`` (default), return a DataFrame.
        server: Server target.
        dataframe_type: ``"pandas"`` or ``"polars"``.

    Returns:
        A DataFrame of custom aggregate statistics.

    Example:
        >>> import povineq
        >>> df = povineq.get_agg(aggregate="fcv", server="qa")
    """
    params = AggParams(
        year=year,
        povline=povline,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        aggregate=aggregate,
        api_version=api_version,
        format=fmt,
    )

    query = params.to_query_params()
    query.pop("api_version", None)

    response = build_and_execute(
        ENDPOINT_PIP_GRP, query, server=server, api_version=api_version
    )
    return parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

get_stats(country='all', year='all', povline=None, popshare=None, fill_gaps=False, nowcast=False, subgroup=None, welfare_type='all', reporting_level='all', version=None, ppp_version=None, release_version=None, api_version=API_VERSION, fmt='arrow', simplify=True, server=None, dataframe_type='pandas')

Get poverty and inequality statistics from the PIP API.

This is the primary function for querying household survey-based poverty and inequality estimates. It mirrors pipr::get_stats().

Parameters:

Name Type Description Default
country str | list[str]

ISO3 country code(s) or "all".

'all'
year str | int | list[int]

Survey year(s) or "all".

'all'
povline float | None

Poverty line in 2017 PPP USD per day.

None
popshare float | None

Proportion of the population below the poverty line. When set, povline is ignored.

None
fill_gaps bool

If True, interpolate/extrapolate values for years without survey data.

False
nowcast bool

If True, include nowcast estimates (implies fill_gaps=True).

False
subgroup str | None

Pre-defined aggregation. Either "wb_regions" or "none". When set, routes to the pip-grp endpoint.

None
welfare_type str

Welfare concept — "all", "income", or "consumption".

'all'
reporting_level str

Geographic level — "all", "national", "urban", or "rural".

'all'
version str | None

Data version string (see :func:~povineq.info.get_versions).

None
ppp_version int | None

PPP base year.

None
release_version str | None

Release date in YYYYMMDD format.

None
api_version str

API version (only "v1" currently).

API_VERSION
fmt str

Response format — "arrow" (default), "json", or "csv".

'arrow'
simplify bool

If True (default), return a DataFrame. If False, return a :class:~povineq._response.PIPResponse wrapper.

True
server str | None

Server target — None/"prod", "qa", or "dev".

None
dataframe_type Literal['pandas', 'polars']

"pandas" (default) or "polars".

'pandas'

Returns:

Name Type Description
A DataFrame | PIPResponse

class:~pandas.DataFrame when simplify is True, or a

DataFrame | PIPResponse

class:~povineq._response.PIPResponse when simplify is False.

Raises:

Type Description
PIPValidationError

If parameter values are invalid.

PIPAPIError

If the API returns a structured error response.

PIPRateLimitError

If the rate limit is exceeded after retries.

PIPConnectionError

If the network is unreachable.

Example

import povineq df = povineq.get_stats(country="AGO", year=2000) df = povineq.get_stats(country="all", year="all", fill_gaps=True) df = povineq.get_stats(country="all", subgroup="wb_regions")

Source code in src/povineq/stats.py
def get_stats(
    country: str | list[str] = "all",
    year: str | int | list[int] = "all",
    povline: float | None = None,
    popshare: float | None = None,
    fill_gaps: bool = False,
    nowcast: bool = False,
    subgroup: str | None = None,
    welfare_type: str = "all",
    reporting_level: str = "all",
    version: str | None = None,
    ppp_version: int | None = None,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "arrow",
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Get poverty and inequality statistics from the PIP API.

    This is the primary function for querying household survey-based poverty
    and inequality estimates. It mirrors ``pipr::get_stats()``.

    Args:
        country: ISO3 country code(s) or ``"all"``.
        year: Survey year(s) or ``"all"``.
        povline: Poverty line in 2017 PPP USD per day.
        popshare: Proportion of the population below the poverty line.
            When set, *povline* is ignored.
        fill_gaps: If ``True``, interpolate/extrapolate values for years
            without survey data.
        nowcast: If ``True``, include nowcast estimates (implies
            ``fill_gaps=True``).
        subgroup: Pre-defined aggregation. Either ``"wb_regions"`` or
            ``"none"``. When set, routes to the ``pip-grp`` endpoint.
        welfare_type: Welfare concept — ``"all"``, ``"income"``, or
            ``"consumption"``.
        reporting_level: Geographic level — ``"all"``, ``"national"``,
            ``"urban"``, or ``"rural"``.
        version: Data version string (see :func:`~povineq.info.get_versions`).
        ppp_version: PPP base year.
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version (only ``"v1"`` currently).
        fmt: Response format — ``"arrow"`` (default), ``"json"``, or
            ``"csv"``.
        simplify: If ``True`` (default), return a DataFrame. If ``False``,
            return a :class:`~povineq._response.PIPResponse` wrapper.
        server: Server target — ``None``/``"prod"``, ``"qa"``, or ``"dev"``.
        dataframe_type: ``"pandas"`` (default) or ``"polars"``.

    Returns:
        A :class:`~pandas.DataFrame` when *simplify* is ``True``, or a
        :class:`~povineq._response.PIPResponse` when *simplify* is ``False``.

    Raises:
        PIPValidationError: If parameter values are invalid.
        PIPAPIError: If the API returns a structured error response.
        PIPRateLimitError: If the rate limit is exceeded after retries.
        PIPConnectionError: If the network is unreachable.

    Example:
        >>> import povineq
        >>> df = povineq.get_stats(country="AGO", year=2000)
        >>> df = povineq.get_stats(country="all", year="all", fill_gaps=True)
        >>> df = povineq.get_stats(country="all", subgroup="wb_regions")
    """
    logger.debug(
        "get_stats",
        country=country,
        year=year,
        povline=povline,
        popshare=popshare,
        fill_gaps=fill_gaps,
        nowcast=nowcast,
        subgroup=subgroup,
    )

    # Validate and apply business rules via pydantic
    params = StatsParams(
        country=country,
        year=year,
        povline=povline,
        popshare=popshare,
        fill_gaps=fill_gaps,
        nowcast=nowcast,
        subgroup=subgroup,
        welfare_type=welfare_type,
        reporting_level=reporting_level,
        version=version,
        ppp_version=ppp_version,
        release_version=release_version,
        api_version=api_version,
        format=fmt,
    )

    # Route endpoint
    if params.subgroup is not None:
        endpoint = ENDPOINT_PIP_GRP
        group_by = "wb" if params.subgroup == "wb_regions" else params.subgroup
    else:
        endpoint = ENDPOINT_PIP
        group_by = None

    # Build query params (exclude subgroup; use group_by instead)
    query = params.to_query_params()
    query.pop("subgroup", None)
    query.pop("nowcast", None)  # nowcast is not an API query param
    if group_by is not None:
        query["group_by"] = group_by

    response = build_and_execute(endpoint, query, server=server, api_version=api_version)

    out = parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

    # When fill_gaps=False (and simplify=True) filter out nowcast rows
    # pipr does this because estimate_type is only returned when fill_gaps=True
    if params.nowcast is False and simplify and isinstance(out, pd.DataFrame):
        if "estimate_type" in out.columns:
            out = out[~out["estimate_type"].str.contains("nowcast", na=False)].copy()

    return out

get_wb(year='all', povline=None, version=None, ppp_version=None, release_version=None, api_version=API_VERSION, fmt='json', simplify=True, server=None, dataframe_type='pandas')

Get World Bank regional and global aggregate statistics.

Shorthand for get_stats(subgroup="wb_regions"). Mirrors pipr::get_wb().

Parameters:

Name Type Description Default
year str | int | list[int]

Year(s) or "all".

'all'
povline float | None

Poverty line in 2017 PPP USD per day.

None
version str | None

Data version string.

None
ppp_version int | None

PPP base year.

None
release_version str | None

Release date in YYYYMMDD format.

None
api_version str

API version.

API_VERSION
fmt str

Response format — "json" (default) or "csv".

'json'
simplify bool

If True (default), return a DataFrame.

True
server str | None

Server target.

None
dataframe_type Literal['pandas', 'polars']

"pandas" or "polars".

'pandas'

Returns:

Type Description
DataFrame | PIPResponse

A DataFrame of WB regional/global aggregates.

Example

import povineq df = povineq.get_wb()

Source code in src/povineq/stats.py
def get_wb(
    year: str | int | list[int] = "all",
    povline: float | None = None,
    version: str | None = None,
    ppp_version: int | None = None,
    release_version: str | None = None,
    api_version: str = API_VERSION,
    fmt: str = "json",
    simplify: bool = True,
    server: str | None = None,
    dataframe_type: Literal["pandas", "polars"] = "pandas",
) -> pd.DataFrame | PIPResponse:
    """Get World Bank regional and global aggregate statistics.

    Shorthand for ``get_stats(subgroup="wb_regions")``.
    Mirrors ``pipr::get_wb()``.

    Args:
        year: Year(s) or ``"all"``.
        povline: Poverty line in 2017 PPP USD per day.
        version: Data version string.
        ppp_version: PPP base year.
        release_version: Release date in ``YYYYMMDD`` format.
        api_version: API version.
        fmt: Response format — ``"json"`` (default) or ``"csv"``.
        simplify: If ``True`` (default), return a DataFrame.
        server: Server target.
        dataframe_type: ``"pandas"`` or ``"polars"``.

    Returns:
        A DataFrame of WB regional/global aggregates.

    Example:
        >>> import povineq
        >>> df = povineq.get_wb()
    """
    query: dict[str, str] = {}
    if year != "all":
        query["year"] = ",".join(str(y) for y in year) if isinstance(year, list) else str(year)
    else:
        query["year"] = "all"

    if povline is not None:
        query["povline"] = str(povline)
    if version is not None:
        query["version"] = version
    if ppp_version is not None:
        query["ppp_version"] = str(ppp_version)
    if release_version is not None:
        query["release_version"] = release_version
    query["format"] = fmt
    query["group_by"] = "wb"

    response = build_and_execute(
        ENDPOINT_PIP_GRP, query, server=server, api_version=api_version
    )
    return parse_response(response, simplify=simplify, dataframe_type=dataframe_type)

change_grouped_stats_to_csv(df)

Expand a deciles list column into individual decile1--decileN columns.

Mirrors pipr::change_grouped_stats_to_csv(). When the PIP API returns grouped statistics in JSON or RDS format, the decile values are packed into a single "deciles" column whose cells contain lists. This function unpacks those lists into separate columns and drops the original list column.

If the DataFrame has no "deciles" column, it is returned unchanged.

Parameters:

Name Type Description Default
df DataFrame

DataFrame potentially containing a "deciles" list-column.

required

Returns:

Type Description
DataFrame

DataFrame with individual "decile1", "decile2", … columns

DataFrame

replacing the "deciles" column.

Example

import pandas as pd from povineq.utils import change_grouped_stats_to_csv df = pd.DataFrame({"country": ["ALB"], "deciles": [[0.1, 0.2]]}) change_grouped_stats_to_csv(df) country decile1 decile2 0 ALB 0.1 0.2

Source code in src/povineq/utils.py
def change_grouped_stats_to_csv(df: pd.DataFrame) -> pd.DataFrame:
    """Expand a ``deciles`` list column into individual ``decile1``--``decileN`` columns.

    Mirrors ``pipr::change_grouped_stats_to_csv()``. When the PIP API returns
    grouped statistics in JSON or RDS format, the decile values are packed into
    a single ``"deciles"`` column whose cells contain lists. This function
    unpacks those lists into separate columns and drops the original list column.

    If the DataFrame has no ``"deciles"`` column, it is returned unchanged.

    Args:
        df: DataFrame potentially containing a ``"deciles"`` list-column.

    Returns:
        DataFrame with individual ``"decile1"``, ``"decile2"``, … columns
        replacing the ``"deciles"`` column.

    Example:
        >>> import pandas as pd
        >>> from povineq.utils import change_grouped_stats_to_csv
        >>> df = pd.DataFrame({"country": ["ALB"], "deciles": [[0.1, 0.2]]})
        >>> change_grouped_stats_to_csv(df)
          country  decile1  decile2
        0     ALB      0.1      0.2
    """
    if "deciles" not in df.columns:
        return df

    deciles_series = df["deciles"]

    # Only list/tuple cells are valid decile containers; strings would silently
    # corrupt output (each character would become a "decile value").
    valid_mask = deciles_series.map(lambda v: isinstance(v, (list, tuple)))
    valid_lengths = deciles_series[valid_mask].map(len)

    if valid_lengths.empty:
        return df.drop(columns=["deciles"])

    # Guard against rows with different decile counts — that would produce
    # ragged output with silent None-padding.
    if valid_lengths.nunique() != 1:
        raise ValueError(
            "Rows in the 'deciles' column have different list lengths "
            f"({sorted(valid_lengths.unique().tolist())}). Cannot pivot to columns."
        )

    n_deciles = int(valid_lengths.iloc[0])

    # Vectorised unpacking via map() — avoids Python-level apply loop.
    decile_data = pd.DataFrame(
        deciles_series.where(valid_mask).map(
            lambda v: list(v) if isinstance(v, (list, tuple)) else [None] * n_deciles
        ).tolist(),
        index=df.index,
    ).rename(columns=lambda i: f"decile{i + 1}")

    result = df.drop(columns=["deciles"])
    return pd.concat([result, decile_data], axis=1)