Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
martinfleis committed Jan 10, 2024
1 parent cc11fc1 commit 0cb4ac3
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 33 deletions.
10 changes: 7 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ __pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

Expand Down Expand Up @@ -217,4 +215,10 @@ rsconnect/

# quarto render
book/book/
_site/
_site/

code/04_generalisation/app/
code/04_generalisation/cache/
code/04_generalisation/engine/
code/04_generalisation/temp/

30 changes: 11 additions & 19 deletions code/04_generalisation/air_quality_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@
"import matplotlib.pyplot as plt\n",
"\n",
"from libpysal import graph\n",
"from sklearn.ensemble import HistGradientBoostingRegressor\n",
"from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor\n",
"from sklearn import metrics\n",
"from sklearn.model_selection import cross_val_predict, GridSearchCV"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9bc08843-59bd-4436-aa01-36955c8d1eb6",
"id": "95d35d82-1821-4054-8b64-954b1323d58a",
"metadata": {
"tags": []
},
Expand All @@ -79,7 +79,7 @@
{
"attachments": {},
"cell_type": "markdown",
"id": "a48a4192-89a8-4802-909c-c53ae7111869",
"id": "a657bb20-465f-459a-9516-1d738fde7c94",
"metadata": {},
"source": [
"Load the data"
Expand Down Expand Up @@ -941,7 +941,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "e4ac79b3-cf65-4678-8607-50d7e2101834",
"metadata": {},
"outputs": [],
Expand All @@ -951,7 +951,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 6,
"id": "a0b7b29c-b8fe-4502-a23c-3ee09573df3c",
"metadata": {},
"outputs": [],
Expand All @@ -963,24 +963,24 @@
"lags = pd.DataFrame(\n",
" np.vstack(lags).T, \n",
" index=data.index, \n",
" columns=exvars + \"_lag_binary\"\n",
" columns=exvars + \"_lag\"\n",
")\n",
"\n",
"coords = pd.DataFrame(data.index.to_series().apply(h3.h3_to_geo).tolist(), columns=[\"lat\", \"lon\"], index=data.index)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 7,
"id": "accb8fbd-583c-49f8-aaa2-ce49fcb01f18",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7min 4s, sys: 836 ms, total: 7min 4s\n",
"Wall time: 58.8 s\n"
"CPU times: user 7min 13s, sys: 726 ms, total: 7min 13s\n",
"Wall time: 59.8 s\n"
]
},
{
Expand All @@ -992,7 +992,7 @@
"HistGradientBoostingRegressor(max_bins=128, max_iter=1000, random_state=0)"
]
},
"execution_count": 11,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1017,22 +1017,14 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 8,
"id": "c973b472-0af2-4da2-b31e-a5748df82076",
"metadata": {},
"outputs": [],
"source": [
"with open(f\"{data_folder}/h3/air_quality_model.joblib\", \"wb\") as f:\n",
" joblib.dump(regressor_final, f, compress=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a769244a-a03d-47df-a63f-7cdbd4a15047",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
82 changes: 71 additions & 11 deletions code/04_generalisation/house_price_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 6,
"id": "adb02788-cc79-4396-9275-d1de355120c9",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -1228,7 +1228,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"id": "ba40c38b-04d2-4f64-9748-5bf6cc23e850",
"metadata": {},
"outputs": [],
Expand All @@ -1238,36 +1238,46 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "198863bc-e731-440a-8ac7-db6d29b8b367",
"metadata": {},
"outputs": [],
"source": [
"matrix_clean = subset_graph(matrix, data.index)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5550ad96-0d8b-4035-8ade-529f48d891cd",
"metadata": {},
"outputs": [],
"source": [
"lags = []\n",
"for var in exvars:\n",
" lags.append(matrix.lag(data[var]))\n",
" lags.append(matrix_clean.lag(data[var]))\n",
"\n",
"lags = pd.DataFrame(\n",
" np.vstack(lags).T, \n",
" index=data.index, \n",
" columns=exvars + \"_lag_binary\"\n",
" columns=exvars + \"_lag\"\n",
")\n",
"\n",
"coords = pd.DataFrame(data.index.to_series().apply(h3.h3_to_geo).tolist(), columns=[\"lat\", \"lon\"], index=data.index)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "3e5e316f-ab7e-4dd1-a334-801c04c64e58",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6min 38s, sys: 824 ms, total: 6min 39s\n",
"Wall time: 55.3 s\n"
"CPU times: user 7min 10s, sys: 733 ms, total: 7min 11s\n",
"Wall time: 59 s\n"
]
},
{
Expand All @@ -1279,7 +1289,7 @@
"HistGradientBoostingRegressor(max_bins=128, max_iter=1000, random_state=0)"
]
},
"execution_count": 7,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1304,7 +1314,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"id": "2377f1f9-63b7-4784-9c44-1d253fc1a396",
"metadata": {},
"outputs": [],
Expand All @@ -1315,10 +1325,60 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 21,
"id": "f54479a8-9de4-4f72-93fa-be4735a94033",
"metadata": {},
"outputs": [],
"source": [
"test = pd.read_parquet(f\"{data_folder}/h3/test_dataframe.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "f2e3b877-f5cf-4ede-991d-d03d52772292",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([8.06233449, 8.00722043, 8.23016915, 8.05892568, 8.0068 ,\n",
" 8.11586171, 8.2015727 , 8.11319305, 8.093583 , 8.04384344,\n",
" 8.04673512, 8.0698688 , 8.40464618, 8.0390008 , 8.02034675,\n",
" 8.01925916, 8.09890871, 8.04424289, 8.16721198, 8.22857131,\n",
" 8.20609517, 8.13370174, 8.21208393, 8.2750854 , 8.32720329,\n",
" 8.27140094, 8.30552463, 8.24340398, 8.29624915, 8.18316912,\n",
" 8.24425409, 8.16948669, 8.29027264, 8.30210798, 8.23456201,\n",
" 8.25204357, 8.33126324, 8.32372631, 8.15944809, 8.32383247,\n",
" 8.11205762, 8.23732133, 8.26973505, 8.32003781, 8.30502667,\n",
" 8.27283892, 8.24362349, 8.23026649, 8.35689258, 8.1517334 ,\n",
" 8.31643866, 8.22854347, 8.28398349, 8.34436748, 8.34505042,\n",
" 8.28033006, 8.21545101, 8.30032823, 8.39351619, 8.35949784,\n",
" 8.3247135 , 8.29561425, 8.31600062, 8.31454537, 8.37800727,\n",
" 8.27300226, 8.35663346, 8.43808543, 8.40056998, 8.39488489,\n",
" 8.33007259, 8.23171775, 8.36576051, 8.4129574 , 8.43281291,\n",
" 8.39264688, 8.35920223, 8.37525182, 8.29730092, 8.25082708,\n",
" 8.35995364, 8.40641258, 8.40075452, 8.3591094 , 8.39779213,\n",
" 8.39053811, 8.31131077, 8.366623 , 8.3663535 , 8.38307759,\n",
" 8.0235025 , 7.97413075, 7.95448047, 8.00362844, 7.9896409 ,\n",
" 7.97478846, 7.95226829, 7.9323486 , 7.92122621, 8.04491905])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"regressor_final.predict(test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ade3d39e-f91e-459c-8b89-eb29c87cacd8",
"metadata": {},
"outputs": [],
"source": []
}
],
Expand Down
67 changes: 67 additions & 0 deletions code/04_generalisation/prototype.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@
"from r5py import TransportNetwork, TravelTimeMatrixComputer, TransportMode"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2023.1.0'"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sklearn\n",
"xr.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -166,6 +187,26 @@
"grid = gpd.read_parquet(f\"{data_folder}/h3/grid_complete.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1382771, 59)"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -1160,6 +1201,32 @@
" json.dump(registry, f)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['engine/default_data.parquet',\n",
" 'engine/hashes.json',\n",
" 'engine/empty.parquet',\n",
" 'engine/oa_key.parquet',\n",
" 'engine/oa_area.parquet',\n",
" 'engine/matrix.parquet',\n",
" 'engine/accessibility.joblib']"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(glob(f\"engine/*\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down

0 comments on commit 0cb4ac3

Please sign in to comment.