Generate rollout grids

%matplotlib inline
%reload_ext autoreload
%autoreload 2
import sys

sys.path.append("../../../")
import os
from pathlib import Path

import geopandas as gpd
import pandas as pd
import requests

from povertymapping import settings
from povertymapping.iso3 import get_region_name
from povertymapping.rollout_grids import (
    compute_raster_stats,
    get_region_filtered_bingtile_grids,
)

Generate Rollout Grids: Myanmar

This notebook generates the rollout grid tiles over the country. The output file is used as an input for Step 3, where we will run the trained model over the set of grids.

The generated grids are set at 2.4km (zoom level 14), matching the grids used during model training. The grids are also filtered to only include populated areas based on a gridded population data.

Notes on Myanmar

As of time of writing, HRSL population data for Myanmar is not available in the HDX site, which will cause hdx.get_hdx_file to fail. We use population data downloaded from WorldPop instead to filter the grids.

# This auto-downloads the data
pop_file = settings.DATA_DIR / "worldpop/mm/mmr_ppp_2020_UNadj_constrained.tif"
url = "https://data.worldpop.org/GIS/Population/Global_2000_2020/2016/MMR/mmr_ppp_2020_UNadj_constrained.tif"


Path(pop_file).parent.mkdir(parents=True, exist_ok=True)
!wget -nc $url -O $pop_file 
File ‘/home/alron/unicef-ai4d-poverty-mapping/notebooks/2023-02-21-single-country-rollouts/mm/../../../data/worldpop/mm/mmr_ppp_2020_UNadj_constrained.tif’ already there; not retrieving.

Set country-specific parameters

COUNTRY_CODE = "mm"
REGION = get_region_name(COUNTRY_CODE, code="alpha-2").lower()
ADMIN_LVL = "ADM2"
ZOOM_LEVEL = 14
GROUP_ZOOM_LEVEL = 10

Generate Grids

admin_grids_gdf = get_region_filtered_bingtile_grids(
    REGION,
    admin_lvl=ADMIN_LVL,
    quadkey_lvl=ZOOM_LEVEL,
    use_cache=True,
    filter_population=False,  # We set this to False to not trigger an HDX download
)
2023-04-16 22:45:23.771 | INFO     | povertymapping.rollout_grids:get_region_filtered_bingtile_grids:260 - Loading cached grids file /home/alron/.cache/geowrangler/quadkey_grids/myanmar_14_ADM2_admin_grids.geojson

Compute Population Per Grid

For this calculation, we will calculate the population, batched based on groups calculated from the quadkey. By getting the first n digits of the quadkey, we are able to get the zoom level n quadkey to which that tile belongs to.

Ex. if we group based on zoom level 10 the quadkey 31000101131223 belongs to the grouping 3100010113.

This grouping ensures that the tile groupings are geographically close to one another, which reduces the raster window size that we needed to calculate the population count.

For more information about quadkeys, see Bing Maps Tile System.

quadkey_group_col = f"quadkey_level{GROUP_ZOOM_LEVEL}"
admin_grids_gdf[quadkey_group_col] = admin_grids_gdf["quadkey"].str[:GROUP_ZOOM_LEVEL]
admin_grids_gdf.head(3)
quadkey shapeName shapeISO shapeID shapeGroup shapeType geometry quadkey_level10
0 13220212231203 Yangon (South) None MMR-ADM2-3_0_0-B72 MMR ADM2 POLYGON ((93.36182 14.13658, 93.36182 14.15788... 1322021223
1 13220212231221 Yangon (South) None MMR-ADM2-3_0_0-B72 MMR ADM2 POLYGON ((93.36182 14.11527, 93.36182 14.13658... 1322021223
2 13220212231223 Yangon (South) None MMR-ADM2-3_0_0-B72 MMR ADM2 POLYGON ((93.36182 14.09396, 93.36182 14.11527... 1322021223

Compute population per grid tile and filter to populated areas only

# For Worldpop data only: set aggregation settings
aggregation = dict(column="population", output="pop_count", func="sum")
extra_args = dict(nodata=-99999)
# For Worldpop data only: compute population totals per grid
admin_grids_pop_count = compute_raster_stats(
    admin_grids_gdf,
    pop_file,
    aggregation=aggregation,
    extra_args=extra_args,
    group_col=quadkey_group_col,
    max_batch_size=None,
    n_workers=2,
)
2023-04-16 22:45:34.028 | INFO     | povertymapping.rollout_grids:compute_raster_stats:72 - Creating raster zonal stats for 133637 grids for file size 8.619787 Mb, batched in 647 unique group/s from quadkey_level10
2023-04-16 22:45:34.029 | WARNING  | povertymapping.rollout_grids:compute_raster_stats:75 - When batching by group, output gdf rows will be ordered based on the group.
100%|██████████| 647/647 [03:11<00:00,  3.38it/s]
2023-04-16 22:48:45.623 | INFO     | povertymapping.rollout_grids:compute_raster_stats:90 - Completed raster zonal stats for 647 groups
2023-04-16 22:48:45.703 | INFO     | povertymapping.rollout_grids:compute_raster_stats:92 - Concatenated raster zonal stats for 647 groups
admin_grids_filtered = admin_grids_pop_count[admin_grids_pop_count["pop_count"] > 0]
admin_grids_filtered = admin_grids_filtered.reset_index(drop=True)
admin_grids_filtered.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 27830 entries, 0 to 27829
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   quadkey          27830 non-null  object  
 1   shapeName        27830 non-null  object  
 2   shapeISO         27830 non-null  object  
 3   shapeID          27830 non-null  object  
 4   shapeGroup       27830 non-null  object  
 5   shapeType        27830 non-null  object  
 6   geometry         27830 non-null  geometry
 7   quadkey_level10  27830 non-null  object  
 8   pop_count        27830 non-null  float64 
dtypes: float64(1), geometry(1), object(7)
memory usage: 1.9+ MB

Explore Populated Grids

admin_grids_filtered.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 27830 entries, 0 to 27829
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   quadkey          27830 non-null  object  
 1   shapeName        27830 non-null  object  
 2   shapeISO         27830 non-null  object  
 3   shapeID          27830 non-null  object  
 4   shapeGroup       27830 non-null  object  
 5   shapeType        27830 non-null  object  
 6   geometry         27830 non-null  geometry
 7   quadkey_level10  27830 non-null  object  
 8   pop_count        27830 non-null  float64 
dtypes: float64(1), geometry(1), object(7)
memory usage: 1.9+ MB
admin_grids_filtered.head(2)
quadkey shapeName shapeISO shapeID shapeGroup shapeType geometry quadkey_level10 pop_count
0 13220212231203 Yangon (South) None MMR-ADM2-3_0_0-B72 MMR ADM2 POLYGON ((93.36182 14.13658, 93.36182 14.15788... 1322021223 1646.340576
1 13220212231221 Yangon (South) None MMR-ADM2-3_0_0-B72 MMR ADM2 POLYGON ((93.36182 14.11527, 93.36182 14.13658... 1322021223 749.037048
# Uncomment to view interactive map
# admin_grids_filtered.explore()

Save to file

rollout_date = "-".join(os.getcwd().split("/")[-2].split("-")[:3])
grid_save_path = f"./{rollout_date}-{COUNTRY_CODE}-rollout-grids.geojson"
admin_grids_filtered.to_file(grid_save_path, driver="GeoJSON", index=False)