Skip to content

Commit

Permalink
optional persistent caching of all AWS queries
Browse files Browse the repository at this point in the history
  • Loading branch information
daroczig committed Dec 29, 2023
1 parent edccc96 commit 287b430
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 17 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,19 @@ Examples:

```py
from sc_crawler.vendors import aws

# enable persistent caching of AWS queries
from cachier import set_default_params
set_default_params(caching_enabled=True)

# fetch data
aws.get_all() # slow to query all instance types in all regions

# look around
aws.datacenters
aws.zones

# pretty printed objects
from rich import print as pp
pp(aws)
pp(aws._datacenters[1]._zones)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ name = "sc-crawler"
version = "0.0.1"
requires-python = ">= 3.7"
dependencies = [
"cachier",
"pydantic",
"pydantic_extra_types",
"pycountry",
Expand Down
53 changes: 36 additions & 17 deletions sc_crawler/vendors/aws.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import boto3
from functools import cache
from cachier import cachier, set_default_params
from datetime import timedelta
from itertools import chain
import logging
import re
Expand All @@ -9,6 +10,38 @@

logger = logging.getLogger(__name__)

# disable caching by default
set_default_params(caching_enabled=False)

# ##############################################################################
# AWS cached helpers


@cachier(stale_after=timedelta(days=3))
def describe_instance_types(region):
ec2 = boto3.client("ec2", region_name=region)
return ec2.describe_instance_types().get("InstanceTypes")


@cachier(stale_after=timedelta(days=3))
def describe_regions():
ec2 = boto3.client("ec2")
return ec2.describe_regions().get("Regions", [])


@cachier(stale_after=timedelta(days=3))
def describe_availability_zones(region):
ec2 = boto3.client("ec2", region_name=region)
zones = ec2.describe_availability_zones(
Filters=[
{"Name": "zone-type", "Values": ["availability-zone"]},
],
AllAvailabilityZones=True,
).get("AvailabilityZones")
return zones


# ##############################################################################

def get_datacenters(vendor, *args, **kwargs):
"""List all available AWS datacenters.
Expand Down Expand Up @@ -272,8 +305,7 @@ def get_datacenters(vendor, *args, **kwargs):

# look for undocumented (new) datacenters in AWS
supported_regions = [d.identifier for d in datacenters]
ec2 = boto3.client("ec2")
regions = ec2.describe_regions().get("Regions", [])
regions = describe_regions()
for region in regions:
region_name = region.get("RegionName")
if "gov" in region_name:
Expand All @@ -293,14 +325,7 @@ def get_datacenters(vendor, *args, **kwargs):

# add zones
for datacenter in datacenters:
# need to create a new clien in each AWS region
ec2 = boto3.client("ec2", region_name=datacenter.identifier)
zones = ec2.describe_availability_zones(
Filters=[
{"Name": "zone-type", "Values": ["availability-zone"]},
],
AllAvailabilityZones=True,
).get("AvailabilityZones")
zones = describe_availability_zones(datacenter.identifier)
datacenter._zones = {
zone.get("ZoneId"): Zone(
identifier=zone.get("ZoneId"),
Expand All @@ -313,12 +338,6 @@ def get_datacenters(vendor, *args, **kwargs):
return datacenters


@cache
def describe_instance_types(region):
ec2 = boto3.client("ec2", region_name=region)
return ec2.describe_instance_types().get("InstanceTypes")


instance_families = {
"c": "Compute optimized",
"d": "Dense storage",
Expand Down

0 comments on commit 287b430

Please sign in to comment.