Skip to content

Commit

Permalink
Merge pull request #201 from RobokopU24/logging_cleanup
Browse files Browse the repository at this point in the history
Logging cleanup
  • Loading branch information
beasleyjonm committed Feb 19, 2024
2 parents 7e5c208 + 71a6936 commit bfdc5c4
Show file tree
Hide file tree
Showing 5 changed files with 3 additions and 45 deletions.
34 changes: 0 additions & 34 deletions Common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,40 +426,6 @@ def get_foodb_files(self, full_url: str, data_dir: str, data_file_name: str, fil
# return the list
return file_count, foodb_dir, name[0]

@staticmethod
def format_normalization_failures(data_set_name: str, node_norm_failures: list, edge_norm_failures: list):
"""
outputs the nodes/edges that failed normalization
:param data_set_name: the name of the data source that produced these results
:param node_norm_failures: set of node curies
:param edge_norm_failures: set of edge predicates
:return:
"""
the_logger = LoggingUtil.init_logging(f"ORION.Common.NormFailures.{data_set_name}", level=logging.INFO, line_format='medium', log_file_path=os.path.join(Path(__file__).parents[1], 'logs'))

# get the list into a dataframe group
df = pd.DataFrame(node_norm_failures, columns=['curie'])
df_node_grp = df.groupby('curie').size() \
.reset_index(name='count') \
.sort_values('count', ascending=False)

# iterate through the groups and create the edge records.
for row_index, row in df_node_grp.iterrows():
the_logger.info(f'{row["curie"]}\t{data_set_name}')
# self.logger.info(f'Failed node CURIE: {row["curie"]}, count: {row["count"]}')

# get the list into a dataframe group
df = pd.DataFrame(edge_norm_failures, columns=['curie'])
df_edge_grp = df.groupby('curie').size() \
.reset_index(name='count') \
.sort_values('count', ascending=False)

# iterate through the groups and create the edge records.
for row_index, row in df_edge_grp.iterrows():
the_logger.info(f'{row["curie"]}\t{data_set_name}')
# self.logger.info(f'Failed edge predicate: {row["curie"]}, count: {row["count"]}')

@staticmethod
def split_file(archive_file_path: str, output_dir: str, data_file_name: str, lines_per_file: int = 500000) -> list:
"""
Expand Down
3 changes: 1 addition & 2 deletions parsers/IntAct/src/loadIA.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import os
import argparse
import enum
import logging
import re

from io import TextIOWrapper
from csv import reader
from operator import itemgetter
from zipfile import ZipFile
from Common.utils import LoggingUtil, GetData
from Common.utils import GetData
from Common.loader_interface import SourceDataLoader, SourceDataFailedError
from Common.prefixes import NCBITAXON, UNIPROTKB
from Common.kgxmodel import kgxnode, kgxedge
Expand Down
3 changes: 0 additions & 3 deletions parsers/hmdb/src/loadHMDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None):
self.data_url = 'https://hmdb.ca/system/downloads/current/'
self.source_db: str = 'Human Metabolome Database'

# create a logger
self.logger = LoggingUtil.init_logging("ORION.HMDB.HMDBLoader", level=logging.INFO, line_format='medium', log_file_path=os.environ['ORION_LOGS'])

def get_latest_source_version(self) -> str:
"""
gets the version of the data
Expand Down
6 changes: 1 addition & 5 deletions parsers/panther/src/loadPanther.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import os
import csv
import argparse
import logging
import re

import requests

from bs4 import BeautifulSoup
from Common.utils import LoggingUtil, GetData
from Common.utils import GetData
from Common.loader_interface import SourceDataLoader
from Common.kgxmodel import kgxnode, kgxedge
from functools import partial
Expand Down Expand Up @@ -67,9 +66,6 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None):

self.__gene_family_data__ = None

# create a logger
self.logger = LoggingUtil.init_logging("ORION.Panther.PLoader", level=logging.INFO, line_format='medium', log_file_path=os.environ['ORION_LOGS'])

def get_latest_source_version(self) -> str:

if self.data_version:
Expand Down
2 changes: 1 addition & 1 deletion set_up_test_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mkdir -p "$PWD/../ORION_storage"
export ORION_STORAGE="$PWD/../ORION_storage/"

#ORION_GRAPHS - a directory for storing knowledge graphs
#mkdir -p "$PWD/../ORION_graphs"
mkdir -p "$PWD/../ORION_graphs"
export ORION_GRAPHS="$PWD/../ORION_graphs/"

#ORION_LOGS - a directory for storing logs
Expand Down

0 comments on commit bfdc5c4

Please sign in to comment.