%matplotlib inline
%reload_ext autoreload
%autoreload 2
Malaysia Evaluation
import sys
"../../../")
sys.path.append(import os
from povertymapping import settings
from povertymapping.rollout_grids import get_region_filtered_bingtile_grids, compute_raster_stats
from povertymapping.hdx import get_hdx_file
from povertymapping.iso3 import get_iso3_code
from povertymapping.geoboundaries import get_geoboundaries
import geowrangler.vector_zonal_stats as vzs
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
Set country-specific parameters
= 'malaysia'
REGION = 'ADM2'
ADMIN_LVL = 14
ZOOM_LEVEL = 8
GROUP_ZOOM_LEVEL = get_iso3_code(REGION, code='alpha-2').lower() country_code
Read bounds
= get_geoboundaries(region=REGION, adm='ADM1')
bounds_file = gpd.read_file(bounds_file)
bounds_gdf = bounds_gdf.sort_values('shapeName') bounds_gdf
'shapeName')#.explore() bounds_gdf.sort_values(
shapeName | shapeISO | shapeID | shapeGroup | shapeType | geometry | |
---|---|---|---|---|---|---|
1 | Johor | MY-01 | MYS-ADM1-3_0_0-B2 | MYS | ADM1 | MULTIPOLYGON (((104.06240 2.21794, 104.06217 2... |
11 | Kedah | MY-02 | MYS-ADM1-3_0_0-B12 | MYS | ADM1 | MULTIPOLYGON (((99.72140 6.20837, 99.72192 6.2... |
7 | Kelantan | MY-03 | MYS-ADM1-3_0_0-B8 | MYS | ADM1 | MULTIPOLYGON (((101.33398 4.69564, 101.33467 4... |
2 | Kuala Lumpur | MY-14 | MYS-ADM1-3_0_0-B3 | MYS | ADM1 | MULTIPOLYGON (((101.61508 3.15348, 101.61527 3... |
15 | Labuan | MY-15 | MYS-ADM1-3_0_0-B16 | MYS | ADM1 | MULTIPOLYGON (((115.13150 5.21699, 115.13119 5... |
3 | Malacca | MY-04 | MYS-ADM1-3_0_0-B4 | MYS | ADM1 | MULTIPOLYGON (((101.85494 2.41747, 101.86035 2... |
4 | Negeri Sembilan | MY-05 | MYS-ADM1-3_0_0-B5 | MYS | ADM1 | MULTIPOLYGON (((101.71245 2.59533, 101.71259 2... |
10 | Pahang | MY-06 | MYS-ADM1-3_0_0-B11 | MYS | ADM1 | MULTIPOLYGON (((101.33463 4.43296, 101.35839 4... |
14 | Penang | MY-07 | MYS-ADM1-3_0_0-B15 | MYS | ADM1 | MULTIPOLYGON (((100.33822 5.57470, 100.33901 5... |
13 | Perak | MY-08 | MYS-ADM1-3_0_0-B14 | MYS | ADM1 | MULTIPOLYGON (((100.63654 4.15716, 100.63688 4... |
12 | Perlis | MY-09 | MYS-ADM1-3_0_0-B13 | MYS | ADM1 | MULTIPOLYGON (((100.12039 6.42274, 100.12113 6... |
8 | Putrajaya | MY-16 | MYS-ADM1-3_0_0-B9 | MYS | ADM1 | MULTIPOLYGON (((101.65969 2.89914, 101.66036 2... |
5 | Sabah | MY-12 | MYS-ADM1-3_0_0-B6 | MYS | ADM1 | MULTIPOLYGON (((117.62919 4.20156, 117.63091 4... |
6 | Sarawak | MY-13 | MYS-ADM1-3_0_0-B7 | MYS | ADM1 | MULTIPOLYGON (((110.29745 1.66348, 110.29677 1... |
0 | Selangor | MY-10 | MYS-ADM1-3_0_0-B1 | MYS | ADM1 | MULTIPOLYGON (((101.24990 2.90772, 101.24699 2... |
9 | Terengganu | MY-11 | MYS-ADM1-3_0_0-B10 | MYS | ADM1 | MULTIPOLYGON (((103.67313 4.80327, 103.67341 4... |
#bounds_gdf[bounds_gdf.columns[:-1]].to_csv("./my-hes-2020.csv")
Read reference data
= pd.read_csv("./my-hes-2016.csv")
data_df = bounds_gdf.merge(data_df, left_on='shapeName', right_on='province_name', how='left')
data_df 'rank'] = data_df['mean_total_expenditure_2016'].rank(ascending=False)
data_df[ data_df
shapeName | shapeISO | shapeID | shapeGroup | shapeType | geometry | province_name | mean_total_expenditure_2016 | rank | |
---|---|---|---|---|---|---|---|---|---|
0 | Johor | MY-01 | MYS-ADM1-3_0_0-B2 | MYS | ADM1 | MULTIPOLYGON (((104.06240 2.21794, 104.06217 2... | Johor | 4148.38 | 6.0 |
1 | Kedah | MY-02 | MYS-ADM1-3_0_0-B12 | MYS | ADM1 | MULTIPOLYGON (((99.72140 6.20837, 99.72192 6.2... | Kedah | 3061.06 | 14.0 |
2 | Kelantan | MY-03 | MYS-ADM1-3_0_0-B8 | MYS | ADM1 | MULTIPOLYGON (((101.33398 4.69564, 101.33467 4... | Kelantan | 2874.63 | 15.0 |
3 | Kuala Lumpur | MY-14 | MYS-ADM1-3_0_0-B3 | MYS | ADM1 | MULTIPOLYGON (((101.61508 3.15348, 101.61527 3... | Kuala Lumpur | 6214.21 | 2.0 |
4 | Labuan | MY-15 | MYS-ADM1-3_0_0-B16 | MYS | ADM1 | MULTIPOLYGON (((115.13150 5.21699, 115.13119 5... | Labuan | 4068.75 | 7.0 |
5 | Malacca | MY-04 | MYS-ADM1-3_0_0-B4 | MYS | ADM1 | MULTIPOLYGON (((101.85494 2.41747, 101.86035 2... | Malacca | 4373.80 | 4.0 |
6 | Negeri Sembilan | MY-05 | MYS-ADM1-3_0_0-B5 | MYS | ADM1 | MULTIPOLYGON (((101.71245 2.59533, 101.71259 2... | Negeri Sembilan | 3688.39 | 9.0 |
7 | Pahang | MY-06 | MYS-ADM1-3_0_0-B11 | MYS | ADM1 | MULTIPOLYGON (((101.33463 4.43296, 101.35839 4... | Pahang | 3316.16 | 10.0 |
8 | Penang | MY-07 | MYS-ADM1-3_0_0-B15 | MYS | ADM1 | MULTIPOLYGON (((100.33822 5.57470, 100.33901 5... | Penang | 4190.30 | 5.0 |
9 | Perak | MY-08 | MYS-ADM1-3_0_0-B14 | MYS | ADM1 | MULTIPOLYGON (((100.63654 4.15716, 100.63688 4... | Perak | 3233.28 | 11.0 |
10 | Perlis | MY-09 | MYS-ADM1-3_0_0-B13 | MYS | ADM1 | MULTIPOLYGON (((100.12039 6.42274, 100.12113 6... | Perlis | 3084.92 | 13.0 |
11 | Putrajaya | MY-16 | MYS-ADM1-3_0_0-B9 | MYS | ADM1 | MULTIPOLYGON (((101.65969 2.89914, 101.66036 2... | Putrajaya | 6970.93 | 1.0 |
12 | Sabah | MY-12 | MYS-ADM1-3_0_0-B6 | MYS | ADM1 | MULTIPOLYGON (((117.62919 4.20156, 117.63091 4... | Sabah | 2594.99 | 16.0 |
13 | Sarawak | MY-13 | MYS-ADM1-3_0_0-B7 | MYS | ADM1 | MULTIPOLYGON (((110.29745 1.66348, 110.29677 1... | Sarawak | 3118.34 | 12.0 |
14 | Selangor | MY-10 | MYS-ADM1-3_0_0-B1 | MYS | ADM1 | MULTIPOLYGON (((101.24990 2.90772, 101.24699 2... | Selangor | 5182.70 | 3.0 |
15 | Terengganu | MY-11 | MYS-ADM1-3_0_0-B10 | MYS | ADM1 | MULTIPOLYGON (((103.67313 4.80327, 103.67341 4... | Terengganu | 3802.22 | 8.0 |
Read and process model output
= gpd.read_file(settings.ROOT_DIR/"notebooks/2023-02-21-cross-country-rollouts/my/2023-02-21-my-rollout-output.geojson")
out_gdf #explore(column='Predicted Relative Wealth Index') out_gdf.head()
quadkey | shapeName | shapeISO | shapeID | shapeGroup | shapeType | pop_count | Predicted Relative Wealth Index | Predicted Wealth Category (quintile) | geometry | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 13223202222021 | Klang | None | MYS-ADM2-3_0_0-B57 | MYS | ADM2 | 1948.905696 | 0.372511 | B | POLYGON ((101.27197 2.92110, 101.27197 2.94304... |
1 | 13223202222023 | Klang | None | MYS-ADM2-3_0_0-B57 | MYS | ADM2 | 5633.984424 | 0.401336 | B | POLYGON ((101.27197 2.89915, 101.27197 2.92110... |
2 | 13223202222201 | Klang | None | MYS-ADM2-3_0_0-B57 | MYS | ADM2 | 919.554096 | 0.338976 | C | POLYGON ((101.27197 2.87721, 101.27197 2.89915... |
3 | 13223202222010 | Klang | None | MYS-ADM2-3_0_0-B57 | MYS | ADM2 | 61.761096 | 0.330703 | C | POLYGON ((101.29395 2.96498, 101.29395 2.98693... |
4 | 13223202222012 | Klang | None | MYS-ADM2-3_0_0-B57 | MYS | ADM2 | 4659.531576 | 0.441108 | A | POLYGON ((101.29395 2.94304, 101.29395 2.96498... |
# get stats for provinces (ADM1)
= vzs.create_zonal_stats(
out_gdf
bounds_gdf,
out_gdf,="intersects",
overlap_method=[{"column":'Predicted Relative Wealth Index', "func": "mean", "output":"model_wealth_index_mean"}]
aggregations
)'rank'] = out_gdf["model_wealth_index_mean"].rank(ascending=False)
out_gdf[ out_gdf
shapeName | shapeISO | shapeID | shapeGroup | shapeType | geometry | model_wealth_index_mean | rank | |
---|---|---|---|---|---|---|---|---|
1 | Johor | MY-01 | MYS-ADM1-3_0_0-B2 | MYS | ADM1 | MULTIPOLYGON (((104.06240 2.21794, 104.06217 2... | 0.395997 | 10.0 |
11 | Kedah | MY-02 | MYS-ADM1-3_0_0-B12 | MYS | ADM1 | MULTIPOLYGON (((99.72140 6.20837, 99.72192 6.2... | 0.408305 | 8.0 |
7 | Kelantan | MY-03 | MYS-ADM1-3_0_0-B8 | MYS | ADM1 | MULTIPOLYGON (((101.33398 4.69564, 101.33467 4... | 0.345490 | 13.0 |
2 | Kuala Lumpur | MY-14 | MYS-ADM1-3_0_0-B3 | MYS | ADM1 | MULTIPOLYGON (((101.61508 3.15348, 101.61527 3... | 0.621844 | 1.0 |
15 | Labuan | MY-15 | MYS-ADM1-3_0_0-B16 | MYS | ADM1 | MULTIPOLYGON (((115.13150 5.21699, 115.13119 5... | 0.495699 | 4.0 |
3 | Malacca | MY-04 | MYS-ADM1-3_0_0-B4 | MYS | ADM1 | MULTIPOLYGON (((101.85494 2.41747, 101.86035 2... | 0.453636 | 6.0 |
4 | Negeri Sembilan | MY-05 | MYS-ADM1-3_0_0-B5 | MYS | ADM1 | MULTIPOLYGON (((101.71245 2.59533, 101.71259 2... | 0.402734 | 9.0 |
10 | Pahang | MY-06 | MYS-ADM1-3_0_0-B11 | MYS | ADM1 | MULTIPOLYGON (((101.33463 4.43296, 101.35839 4... | 0.343607 | 14.0 |
14 | Penang | MY-07 | MYS-ADM1-3_0_0-B15 | MYS | ADM1 | MULTIPOLYGON (((100.33822 5.57470, 100.33901 5... | 0.531171 | 3.0 |
13 | Perak | MY-08 | MYS-ADM1-3_0_0-B14 | MYS | ADM1 | MULTIPOLYGON (((100.63654 4.15716, 100.63688 4... | 0.384559 | 11.0 |
12 | Perlis | MY-09 | MYS-ADM1-3_0_0-B13 | MYS | ADM1 | MULTIPOLYGON (((100.12039 6.42274, 100.12113 6... | 0.433818 | 7.0 |
8 | Putrajaya | MY-16 | MYS-ADM1-3_0_0-B9 | MYS | ADM1 | MULTIPOLYGON (((101.65969 2.89914, 101.66036 2... | 0.575962 | 2.0 |
5 | Sabah | MY-12 | MYS-ADM1-3_0_0-B6 | MYS | ADM1 | MULTIPOLYGON (((117.62919 4.20156, 117.63091 4... | 0.303807 | 15.0 |
6 | Sarawak | MY-13 | MYS-ADM1-3_0_0-B7 | MYS | ADM1 | MULTIPOLYGON (((110.29745 1.66348, 110.29677 1... | 0.290110 | 16.0 |
0 | Selangor | MY-10 | MYS-ADM1-3_0_0-B1 | MYS | ADM1 | MULTIPOLYGON (((101.24990 2.90772, 101.24699 2... | 0.467446 | 5.0 |
9 | Terengganu | MY-11 | MYS-ADM1-3_0_0-B10 | MYS | ADM1 | MULTIPOLYGON (((103.67313 4.80327, 103.67341 4... | 0.368482 | 12.0 |
Compare model output vs reference
= out_gdf[['shapeISO','shapeName','model_wealth_index_mean','rank', 'geometry']].merge(\
compare_gdf 'shapeISO','shapeName','mean_total_expenditure_2016','rank']], on='shapeISO', how='outer', suffixes=['_reference', '_model']).dropna()
data_df[[ compare_gdf
shapeISO | shapeName_reference | model_wealth_index_mean | rank_reference | geometry | shapeName_model | mean_total_expenditure_2016 | rank_model | |
---|---|---|---|---|---|---|---|---|
0 | MY-01 | Johor | 0.395997 | 10.0 | MULTIPOLYGON (((104.06240 2.21794, 104.06217 2... | Johor | 4148.38 | 6.0 |
1 | MY-02 | Kedah | 0.408305 | 8.0 | MULTIPOLYGON (((99.72140 6.20837, 99.72192 6.2... | Kedah | 3061.06 | 14.0 |
2 | MY-03 | Kelantan | 0.345490 | 13.0 | MULTIPOLYGON (((101.33398 4.69564, 101.33467 4... | Kelantan | 2874.63 | 15.0 |
3 | MY-14 | Kuala Lumpur | 0.621844 | 1.0 | MULTIPOLYGON (((101.61508 3.15348, 101.61527 3... | Kuala Lumpur | 6214.21 | 2.0 |
4 | MY-15 | Labuan | 0.495699 | 4.0 | MULTIPOLYGON (((115.13150 5.21699, 115.13119 5... | Labuan | 4068.75 | 7.0 |
5 | MY-04 | Malacca | 0.453636 | 6.0 | MULTIPOLYGON (((101.85494 2.41747, 101.86035 2... | Malacca | 4373.80 | 4.0 |
6 | MY-05 | Negeri Sembilan | 0.402734 | 9.0 | MULTIPOLYGON (((101.71245 2.59533, 101.71259 2... | Negeri Sembilan | 3688.39 | 9.0 |
7 | MY-06 | Pahang | 0.343607 | 14.0 | MULTIPOLYGON (((101.33463 4.43296, 101.35839 4... | Pahang | 3316.16 | 10.0 |
8 | MY-07 | Penang | 0.531171 | 3.0 | MULTIPOLYGON (((100.33822 5.57470, 100.33901 5... | Penang | 4190.30 | 5.0 |
9 | MY-08 | Perak | 0.384559 | 11.0 | MULTIPOLYGON (((100.63654 4.15716, 100.63688 4... | Perak | 3233.28 | 11.0 |
10 | MY-09 | Perlis | 0.433818 | 7.0 | MULTIPOLYGON (((100.12039 6.42274, 100.12113 6... | Perlis | 3084.92 | 13.0 |
11 | MY-16 | Putrajaya | 0.575962 | 2.0 | MULTIPOLYGON (((101.65969 2.89914, 101.66036 2... | Putrajaya | 6970.93 | 1.0 |
12 | MY-12 | Sabah | 0.303807 | 15.0 | MULTIPOLYGON (((117.62919 4.20156, 117.63091 4... | Sabah | 2594.99 | 16.0 |
13 | MY-13 | Sarawak | 0.290110 | 16.0 | MULTIPOLYGON (((110.29745 1.66348, 110.29677 1... | Sarawak | 3118.34 | 12.0 |
14 | MY-10 | Selangor | 0.467446 | 5.0 | MULTIPOLYGON (((101.24990 2.90772, 101.24699 2... | Selangor | 5182.70 | 3.0 |
15 | MY-11 | Terengganu | 0.368482 | 12.0 | MULTIPOLYGON (((103.67313 4.80327, 103.67341 4... | Terengganu | 3802.22 | 8.0 |
= spearmanr(compare_gdf['mean_total_expenditure_2016'], compare_gdf['model_wealth_index_mean'])
coef, p coef
0.7588235294117647
Heatmap
def despine(ax):
"right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines[
ax.set_xticks([]) ax.set_yticks([])
= plt.subplots(1,2, figsize=(9,5))
fig, axs
compare_gdf.plot(='rank_reference',
column=axs[0],
ax="RdYlGn_r",
cmap=True,
legend={"shrink": 0.3},
legend_kwds
)0])
despine(axs[
compare_gdf.plot(='rank_model',
column=axs[1],
ax="RdYlGn_r",
cmap=True,
legend={"shrink": 0.3},
legend_kwds
)1])
despine(axs[0].set_title('Reference')
axs[1].set_title('Predicted')
axs[# leg1 = axs[0].get_legend()
# leg1.set_title("Rank")
# leg2 = axs[1].get_legend()
# leg2.set_title("Rank")#
#fig.suptitle('Wealth index rank')
="my_rank_comparison.png", dpi=300)
plt.savefig(fname
fig.tight_layout()