Skip to content

Commit

Permalink
Merge branch '13-speedup' of github.com:mdavis-xyz/mms-monthly-cli in…
Browse files Browse the repository at this point in the history
…to 13-speedup
  • Loading branch information
mdavis-xyz committed May 14, 2024
2 parents 6913339 + 7ffc87a commit 83f7f55
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions mms_monthly_cli/mms_monthly.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

import logging
import shutil
from functools import cache
from pathlib import Path
from re import match
from time import sleep
from typing import Dict, List, Union
from zipfile import BadZipFile, ZipFile
from time import sleep
from functools import cache

import requests
from bs4 import BeautifulSoup
Expand All @@ -32,24 +32,29 @@

# Data

MMSDM_ARCHIVE_URL = "https://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/MMSDM/"
MMSDM_ARCHIVE_URL = (
"https://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/MMSDM/"
)
"""Wholesale electricity data archive base URL"""

# requests session, to re-use TLS and HTTP connection across requests
# for speed improvement
_session = requests.Session()
_session.headers.update({
_session.headers.update(
{
"User-Agent": generate_user_agent(),
"Accept": (
"text/html,application/xhtml+xml,application/xml;"
+ "q=0.9,image/avif,image/webp,*/*;q=0.8"
),
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
})
}
)

# Functions to handle requests and scraped soup


def _request_content(url: str, additional_header: Dict = {}) -> requests.Response:
"""Initiates a GET request.
Expand All @@ -67,7 +72,7 @@ def _rerequest_to_obtain_soup(url: str, additional_header: Dict = {}) -> Beautif
Args:
url: URL for GET request.
Returns:
BeautifulSoup object with parsed HTML.
Expand Down Expand Up @@ -235,6 +240,7 @@ def _validate_data_dir(year: int, month: int, data_dir: str) -> None:

# Main functions to find available data, or to obtain data


@cache
def get_years_and_months() -> Dict[int, List[int]]:
"""Years and months with data on NEMWeb MMSDM Historical Data Archive
Expand Down Expand Up @@ -278,6 +284,7 @@ def _get_months(url: str) -> List[int]:
yearmonths[year] = months
return yearmonths


@cache
def get_available_tables(year: int, month: int, data_dir: str) -> List[str]:
"""Tables that can be requested from MMSDM Historical Data Archive for a
Expand Down

0 comments on commit 83f7f55

Please sign in to comment.