Skip to content

Commit

Permalink
Add initial Dmel and GH actions
Browse files Browse the repository at this point in the history
  • Loading branch information
jogoodma committed May 20, 2024
1 parent 06151e7 commit f9cf133
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 45 deletions.
59 changes: 59 additions & 0 deletions .github/workflows/generate-blast-conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Generate FlyBase BLAST Configuration
on:
workflow_dispatch:
inputs:
FB-release:
required: true
dmel-annot-release:
required: true

jobs:
generate:
runs-on: ubuntu-latest
steps:
- name: Checkout repository code
uses: actions/checkout@v4

- name: Set up python
id: setup-python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

#----------------------------------------------
# load cached venv if cache exists
#----------------------------------------------
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}

#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root

- name: Generate configuration
run: poetry run generate --release ${{ github.event.inputs.FB-release }} --dmel-annot-release ${{ github.event.inputs.dmel-annot-release }}

- name: Create PR
uses: peter-evans/create-pull-request@v6
with:
commit-message: ${{ github.event.inputs.FB-release }} BLAST config
branch: ${{ github.event.inputs.FB-release }}-blast-config
delete-branch: true
title: '[Update] ${{ github.event.inputs.FB-release }} BLAST config'
body: |
${{ github.event.inputs.FB-release }} BLAST config
draft: false
27 changes: 27 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# .pre-commit-config.yaml
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-case-conflict
- id: check-merge-conflict
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-toml
- id: check-json
- id: check-added-large-files
- repo: local
hooks:
- id: black
name: black
entry: poetry run black
language: system
types: [file, python]
- id: isort
name: isort
entry: poetry run isort
language: system
types: [file, python]
Empty file added conf/.gitkeep
Empty file.
16 changes: 15 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ tqdm = "^4.66.4"

[tool.poetry.group.dev.dependencies]
black = "^24.4.2"
isort = "^5.13.2"

[build-system]
requires = ["poetry-core"]
Expand Down
18 changes: 11 additions & 7 deletions src/blast_db_configuration/__main__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import json
import logging
from dataclasses import dataclass
from typing_extensions import Annotated
from datetime import datetime
from pathlib import Path
import json
import logging

import agr_blast_service_configuration.schemas.metadata as agrdb
import typer
from tqdm import tqdm
from Bio import Entrez
import agr_blast_service_configuration.schemas.metadata as agrdb
from tqdm import tqdm
from typing_extensions import Annotated

from .db_metadata import create_metadata_from_ncbi
from .db_metadata import create_dmel_metadata, create_metadata_from_ncbi

app = typer.Typer()

Expand Down Expand Up @@ -44,6 +45,9 @@ class DefaultBlastDbConfiguration:
@app.command()
def generate_config(
release: Annotated[str, typer.Option(help="The FlyBase release version")],
dmel_annot_release: Annotated[
str, typer.Option(help="The Dmel annotation release version e.g. r6.57")
],
contact: Annotated[
type(DEFAULT_CONFIG.contact),
typer.Option(help="Email of the FlyBase technical contact."),
Expand Down Expand Up @@ -99,7 +103,7 @@ def generate_config(
unit="organism",
):
if genus == "Drosophila" and species == "melanogaster":
pass
all_dbs.extend(create_dmel_metadata(dmel_annot_release))
else:
all_dbs.extend(create_metadata_from_ncbi(genus, species, ncbi_email))

Expand Down
109 changes: 72 additions & 37 deletions src/blast_db_configuration/db_metadata.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import urllib.request
from typing import Optional

import agr_blast_service_configuration.schemas.metadata as blast_metadata_schema

from .ncbi import taxonomy as tax
from .ncbi import genomes as genomes
from .ncbi import taxonomy as tax

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -72,39 +74,72 @@ def create_metadata_from_ncbi(
return dbs


def create_dmel_metadata():
pass
# dbs.extend(
# [
# blast_metadata.BlastDBMetaData(
# version=options.dmel_annot,
# URI=f"ftp://ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r{options.dmel_annot}_{options.release}/fasta/dmel-all-chromosome-r{options.dmel_annot}.fasta.gz",
# md5sum="b7bc17acfd655914c68326df8599a9ca", # TODO - Hard coded for now, need to fetch this from the MD5SUM file
# genus="Drosophila",
# species="melanogaster",
# blast_title=f"D. melanogaster Genome Assembly ({options.dmel_annot})",
# description="Drosophila melanogaster genome assembly",
# taxon_id="NCBITaxon:7227",
# seqtype="nucl",
# ),
# blast_metadata.BlastDBMetaData(
# version=options.dmel_annot,
# URI=f"ftp://ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r{options.dmel_annot}_{options.release}/fasta/dmel-all-translation-r{options.dmel_annot}.fasta.gz",
# # TODO - Hard coded for now, need to fetch this from the MD5SUM file
# md5sum="e3f959ab0e1026de56e1bd00490450e5",
# genus="Drosophila",
# species="melanogaster",
# blast_title=f"D. melanogaster Protein Sequences ({options.dmel_annot})",
# description="Drosophila melanogaster protein sequences",
# taxon_id="NCBITaxon:7227",
# seqtype="prot",
# ),
# ]
# )
# flybase_blast_metadata = blast_metadata.AGRBlastDatabases(
# metaData=blast_metadata.AGRBlastMetadata(
# contact=options.email, dataProvider="FlyBase", release=options.release
# ),
# data=dbs,
# )
# print(flybase_blast_metadata.json())
def create_dmel_metadata(
dmel_annot_release: str,
) -> list[blast_metadata_schema.SequenceMetadata]:
"""
Generate a list of BLAST DB metadata schemas based on Dmel annot release.
:param dmel_annot_release: The Dmel annot release
:return: List of BLAST DB metadata schemas
"""
dmel_dbs = [
{
"uri": "https://ftp.flybase.org/blast/dmel-assembly.fasta.gz",
"description": f"D. melanogaster Genome Assembly {dmel_annot_release}",
"seqtype": blast_metadata_schema.BlastDBType.NUCL,
"md5_sum": None,
},
{
"uri": "https://ftp.flybase.org/blast/dmel-intergenic.fasta.gz",
"description": f"D. melanogaster Intergenic Regions {dmel_annot_release}",
"seqtype": blast_metadata_schema.BlastDBType.NUCL,
"md5_sum": None,
},
{
"uri": "https://ftp.flybase.org/blast/dmel-transcript.fasta.gz",
"description": f"D. melanogaster Transcripts {dmel_annot_release}",
"seqtype": blast_metadata_schema.BlastDBType.NUCL,
"md5_sum": None,
},
{
"uri": "https://ftp.flybase.org/blast/dmel-translation.fasta.gz",
"description": f"D. melanogaster Proteins {dmel_annot_release}",
"seqtype": blast_metadata_schema.BlastDBType.PROT,
"md5_sum": None,
},
{
"uri": "https://ftp.flybase.org/blast/dmel-transposon.fasta.gz",
"description": f"D. melanogaster Transposons {dmel_annot_release}",
"seqtype": blast_metadata_schema.BlastDBType.NUCL,
"md5_sum": None,
},
]
# TODO: read in checksums and assign them to the appropriate DB.
return [
blast_metadata_schema.SequenceMetadata(
version=dmel_annot_release,
uri=db.get("uri"),
md5_sum="MD5",
genus="Drosophila",
species="melanogaster",
blast_title=db.get("description"),
description=db.get("description"),
taxon_id="7227",
seqtype=db.get("seqtype"),
)
for db in dmel_dbs
]


def fetch_dmel_checksums(uri: str) -> Optional[str]:
"""
Get the current Dmel FASTA checksums.
:param uri: The URI of the checksum file
:return: The text content of the checksum file
"""
with urllib.request.urlopen(uri) as response:
md5_checksums = response.read().decode("utf-8")
return md5_checksums
return None

0 comments on commit f9cf133

Please sign in to comment.