helxplatform · YaphetKG · Aug 7, 2024 · May 13, 2024 · May 13, 2024 · Jun 6, 2024
diff --git a/dags/roger/config/__init__.py b/dags/roger/config/__init__.py
@@ -99,6 +99,21 @@ class AnnotationConfig(DictLike):
             "sapbert": {
                 "classification_url": "https://med-nemo.apps.renci.org/annotate/",
                 "annotator_url": "https://babel-sapbert.apps.renci.org/annotate/",
+                "score_threshold": 0.8,
+                "bagel": {
+                    "enabled": False,
+                    "url": "https://bagel.apps.renci.org/group_synonyms_openai",
+                    "prompt": "bagel/ask_classes",
+                    "llm_args": {
+                        "llm_model_name": "gpt-4o-2024-05-13",
+                        "organization": "",
+                        "access_key": "",
+                        "llm_model_args": {
+                            "top_p": 0,
+                            "temperature": 0.1
+                        }
+                    }
+                }
             },
         }
     )

diff --git a/dags/roger/config/config.yaml b/dags/roger/config/config.yaml
@@ -1,6 +1,6 @@
 redisgraph:
   username: ""
-  password: "12345"
+  password: "weak"
   host: localhost
   graph: test
   port: 6379
@@ -42,13 +42,25 @@ bulk_loader:
 
 annotation:
   clear_http_cache: false
-  annotator_type: monarch
+  annotator_type: sapbert
   annotator_args:
     monarch:
       url: "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
     sapbert: 
       classification_url: "https://med-nemo.apps.renci.org/annotate/"
-      annotator_url: "https://babel-sapbert.apps.renci.org/annotate/"
+      annotator_url: "https://sap-qdrant.apps.renci.org/annotate/"
+      score_threshold: 0.5
+      bagel:
+        enabled: false
+        url: "http://localhost:9099/group_synonyms_openai"
+        prompt: "bagel/ask_classes"
+        llm_args:
+          llm_model_name: "gpt-4o-2024-05-13"
+          organization:
+          access_key:
+          llm_model_args:
+            top_p: 0
+            temperature: 0.1
   normalizer: "https://nodenormalization-dev.apps.renci.org/get_normalized_nodes?conflate=false&description=true&curie="
   synonym_service: "https://name-resolution-sri.renci.org/reverse_lookup"
   ontology_metadata: "https://api.monarchinitiative.org/api/bioentity/"
@@ -93,9 +105,9 @@ indexing:
         action: "files"
 
 elasticsearch:
-  host: elasticsearch
+  host: localhost
   username: elastic
-  password: ""
+  password: "12345"
   nboost_host: ""
   scheme: "http"
   ca_path: ""

diff --git a/dags/roger/pipelines/bdc_pipelines.py b/dags/roger/pipelines/bdc_pipelines.py
@@ -0,0 +1,50 @@
+"Dug pipeline for dbGaP data set"
+
+from roger.pipelines import DugPipeline
+
+class BIOLINCCdbGaPPipeline(DugPipeline):
+    "Pipeline for the dbGaP data set"
+    pipeline_name = 'biolincc'
+    parser_name = 'biolincc'
+
+
+class covid19dbGaPPipeline(DugPipeline):
+    "Pipeline for the dbGaP data set"
+    pipeline_name = 'covid19-dbgap'
+    parser_name = 'covid19'
+
+class dirDbGaPPipeline(DugPipeline):
+    pipeline_name = "dir-dbgap"
+    parser_name = "dir"
+
+class LungMapDbGaPPipeline(DugPipeline):
+    pipeline_name = "lungmap-dbgap"
+    parser_name = "lungmap"
+
+class nsrrDbGaPPipeline(DugPipeline):
+    pipeline_name = "nsrr-dbgap"
+    parser_name = "nsrr"
+
+class ParentDbGaPPipeline(DugPipeline):
+    pipeline_name = "parent-dbgap"
+    parser_name = "parent"
+
+class PCGCDbGaPPipeline(DugPipeline):
+    pipeline_name = "pcgc-dbgap"
+    parser_name = "pcgc"
+
+class RecoverDbGaPPipeline(DugPipeline):
+    pipeline_name = "recover-dbgap"
+    parser_name = "recover"
+
+class TopmedDBGaPPipeline(DugPipeline):
+    pipeline_name = "topmed-gen3-dbgap"
+    parser_name = "topmeddbgap"
+
+class CureSCPipeline(DugPipeline):
+    pipeline_name = "curesc-dbgap"
+    parser_name = "curesc"
+
+class SmallDataDbGap(DugPipeline):
+    pipeline_name = "small-data-dbgap"
+    parser_name = "topmeddbgap"
diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,7 @@ jsonpickle
 redisgraph-bulk-loader==0.12.3
 pytest
 PyYAML
-git+https://github.com/helxplatform/dug@2.13.1
+git+https://github.com/helxplatform/dug@develop
 orjson
 kg-utils==0.0.6
 bmt==1.1.0