diff --git a/Common/utils.py b/Common/utils.py index b2c39ab..037d6a9 100644 --- a/Common/utils.py +++ b/Common/utils.py @@ -426,40 +426,6 @@ def get_foodb_files(self, full_url: str, data_dir: str, data_file_name: str, fil # return the list return file_count, foodb_dir, name[0] - @staticmethod - def format_normalization_failures(data_set_name: str, node_norm_failures: list, edge_norm_failures: list): - """ - outputs the nodes/edges that failed normalization - - :param data_set_name: the name of the data source that produced these results - :param node_norm_failures: set of node curies - :param edge_norm_failures: set of edge predicates - :return: - """ - the_logger = LoggingUtil.init_logging(f"ORION.Common.NormFailures.{data_set_name}", level=logging.INFO, line_format='medium', log_file_path=os.path.join(Path(__file__).parents[1], 'logs')) - - # get the list into a dataframe group - df = pd.DataFrame(node_norm_failures, columns=['curie']) - df_node_grp = df.groupby('curie').size() \ - .reset_index(name='count') \ - .sort_values('count', ascending=False) - - # iterate through the groups and create the edge records. - for row_index, row in df_node_grp.iterrows(): - the_logger.info(f'{row["curie"]}\t{data_set_name}') - # self.logger.info(f'Failed node CURIE: {row["curie"]}, count: {row["count"]}') - - # get the list into a dataframe group - df = pd.DataFrame(edge_norm_failures, columns=['curie']) - df_edge_grp = df.groupby('curie').size() \ - .reset_index(name='count') \ - .sort_values('count', ascending=False) - - # iterate through the groups and create the edge records. - for row_index, row in df_edge_grp.iterrows(): - the_logger.info(f'{row["curie"]}\t{data_set_name}') - # self.logger.info(f'Failed edge predicate: {row["curie"]}, count: {row["count"]}') - @staticmethod def split_file(archive_file_path: str, output_dir: str, data_file_name: str, lines_per_file: int = 500000) -> list: """ diff --git a/parsers/IntAct/src/loadIA.py b/parsers/IntAct/src/loadIA.py index bc47aa5..6ebaf07 100644 --- a/parsers/IntAct/src/loadIA.py +++ b/parsers/IntAct/src/loadIA.py @@ -1,14 +1,13 @@ import os import argparse import enum -import logging import re from io import TextIOWrapper from csv import reader from operator import itemgetter from zipfile import ZipFile -from Common.utils import LoggingUtil, GetData +from Common.utils import GetData from Common.loader_interface import SourceDataLoader, SourceDataFailedError from Common.prefixes import NCBITAXON, UNIPROTKB from Common.kgxmodel import kgxnode, kgxedge diff --git a/parsers/hmdb/src/loadHMDB.py b/parsers/hmdb/src/loadHMDB.py index bd317e1..acfe104 100644 --- a/parsers/hmdb/src/loadHMDB.py +++ b/parsers/hmdb/src/loadHMDB.py @@ -41,9 +41,6 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None): self.data_url = 'https://hmdb.ca/system/downloads/current/' self.source_db: str = 'Human Metabolome Database' - # create a logger - self.logger = LoggingUtil.init_logging("ORION.HMDB.HMDBLoader", level=logging.INFO, line_format='medium', log_file_path=os.environ['ORION_LOGS']) - def get_latest_source_version(self) -> str: """ gets the version of the data diff --git a/parsers/panther/src/loadPanther.py b/parsers/panther/src/loadPanther.py index 426475f..e535218 100644 --- a/parsers/panther/src/loadPanther.py +++ b/parsers/panther/src/loadPanther.py @@ -1,13 +1,12 @@ import os import csv import argparse -import logging import re import requests from bs4 import BeautifulSoup -from Common.utils import LoggingUtil, GetData +from Common.utils import GetData from Common.loader_interface import SourceDataLoader from Common.kgxmodel import kgxnode, kgxedge from functools import partial @@ -67,9 +66,6 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None): self.__gene_family_data__ = None - # create a logger - self.logger = LoggingUtil.init_logging("ORION.Panther.PLoader", level=logging.INFO, line_format='medium', log_file_path=os.environ['ORION_LOGS']) - def get_latest_source_version(self) -> str: if self.data_version: diff --git a/set_up_test_env.sh b/set_up_test_env.sh index b3cffff..39d305c 100644 --- a/set_up_test_env.sh +++ b/set_up_test_env.sh @@ -7,7 +7,7 @@ mkdir -p "$PWD/../ORION_storage" export ORION_STORAGE="$PWD/../ORION_storage/" #ORION_GRAPHS - a directory for storing knowledge graphs -#mkdir -p "$PWD/../ORION_graphs" +mkdir -p "$PWD/../ORION_graphs" export ORION_GRAPHS="$PWD/../ORION_graphs/" #ORION_LOGS - a directory for storing logs