Skip to content

Commit

Permalink
Release notes (#74)
Browse files Browse the repository at this point in the history
* Added release notes, updated docs
* Updated embedder example notebooks (added example model in 'downloads')
* add LOCAL flag to the embedder
* Bugfix in Neo4j, added add_nodes_from_df and add_edges_from_df to PandasPGFrame
  • Loading branch information
eugeniashurko committed Jun 4, 2021
1 parent 8defd3b commit 906609c
Show file tree
Hide file tree
Showing 10 changed files with 324 additions and 163 deletions.
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ BlueGraph supports Python versions >= 3.7 and pip >= 21.0.1. To update pip from
pip install --upgrade pip wheel setuptools


The stable version of BlueGraph can be installed from PyPI using:

::
pip install bluegraph


The development version of BlueGraph can be installed from the source by cloning the current repository as follows:

::
Expand Down
8 changes: 5 additions & 3 deletions bluegraph/backends/neo4j/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _generate_property_repr(properties, prop_types=None):
# create a string property
quote = "'"
props.append("{}: {}{}{}".format(
k, quote,
k.replace(".", "_"), quote,
str(preprocess_value(v)).replace("'", "\\'"), quote))
elif isinstance(v, Iterable):
# create a list property
Expand All @@ -85,10 +85,12 @@ def _generate_property_repr(properties, prop_types=None):
else:
values.append("'{}'".format(preprocess_value(vv)))
if len(values) > 0:
props.append("{}: [{}]".format(k, ", ".join(values)))
props.append("{}: [{}]".format(
k.replace(".", "_"), ", ".join(values)))
elif prop_types[k] == "numeric" and not math.isnan(v):
# create a numerical property
props.append("{}: {}".format(k, preprocess_value(v)))
props.append("{}: {}".format(
k.replace(".", "_"), preprocess_value(v)))
return props


Expand Down
67 changes: 62 additions & 5 deletions bluegraph/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,14 +654,53 @@ def add_nodes(self, node_ids):
new_df = new_df.set_index("@id")
self._nodes = self._nodes.append(new_df)

def add_nodes_from_df(self, df, id_column, node_type=None,
property_types=None):
"""Add nodes from a dataframe."""
if property_types is None:
property_types = {}
new_nodes = df.rename(columns={id_column: "@id"}).set_index("@id")

if node_type:
new_nodes["@type"] = node_type
self._nodes = pd.concat([self._nodes, new_nodes])

for c in new_nodes.columns:
if c in property_types:
self._set_node_prop_type(c, property_types[c])
else:
self.node_prop_as_category(c)

def add_edges(self, edges):
"""Add edge ids to the PG frame."""
sources = [e[0] for e in edges]
targets = [e[1] for e in edges]

new_df = pd.DataFrame({"@source_id": sources, "@target_id": targets})
new_df = new_df.set_index(["@source_id", "@target_id"])
self._edges = self._edges.append(new_df)

def add_edges_from_df(self, df, source_column, target_column,
edge_type=None, property_types=None):
"""Add edges from a dataframe."""
if property_types is None:
property_types = {}
new_edges = df.rename(
columns={
source_column: "@source_id",
target_column: "@target_id",
}).set_index(["@source_id", "@target_id"])

if edge_type:
new_edges["@type"] = edge_type
self._edges = pd.concat([self._edges, new_edges])

for c in new_edges.columns:
if c in property_types:
self._set_edge_prop_type(c, property_types[c])
else:
self.edge_prop_as_category(c)

def add_node_types(self, type_dict):
type_df = pd.DataFrame(
type_dict.items(), columns=["@id", "@type"])
Expand Down Expand Up @@ -1131,12 +1170,30 @@ def remove_isolated_nodes(self):
# Remove nodes
self._nodes = self._nodes.loc[~self._nodes.index.isin(isolates)]

def to_json(self):
def to_json(self, node_id_key=None, node_type_key=None,
edge_id_keys=None, edge_type_key=None):
"""Covert to a JSON dictionary."""
nodes_json = self._nodes.reset_index().to_dict(
orient="records")
edges_json = self._edges.reset_index().to_dict(
orient="records")
nodes = self._nodes.reset_index()
if node_id_key is not None:
nodes = nodes.rename(columns={"@id": node_id_key})
if node_type_key is not None and "@type" in nodes.columns:
nodes = nodes.rename(columns={"@type": node_type_key})
nodes_json = [
r.dropna().to_dict() for _, r in nodes.iterrows()
]

edges = self._edges.reset_index()
if edge_id_keys is not None:
edges = edges.rename(columns={
"@source_id": edge_id_keys[0],
"@target_id": edge_id_keys[1],
})
if edge_type_key is not None and "@type" in edges.columns:
edges = edges.rename(columns={"@type": edge_type_key})
edges_json = [
r.dropna().to_dict() for _, r in edges.iterrows()
]

return {
"nodes": nodes_json,
"edges": edges_json,
Expand Down
98 changes: 98 additions & 0 deletions docs/source/releases/v0.1.2.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
====================
vO.1.2 Release Notes
====================

This release includes some major bug-fixes, several new features and API changes described below.


Blue Graph's core
=================

PGFrame
-------

Updates to the :code:`PGFrame` interface include:

- Added methods:
- :code:`rename_node_properties` and :code:`rename_edge_properties` for changing property names;
- :code:`add_nodes_from_df` and :code:`add_edges_from_df` for additing nodes and edges using dataframes.
- Added the :code:`from_ontology` classmethod for importing (e.g. from Webprotege) ontologies as property graphs.
- Property values that are added to existing properties are now aggregated into sets (and not replaced as it was before).


Backend support
===============

graph-tool
----------

Fix of a major bug occuring during node merging.


Neo4j
-----

Several minor bugfixes of the Neo4j backend were included in this release. In additon, the interfaces of :code:`pgframe_to_neo4j` has changed:

- :code:`NaN` properties are skipped;
- Node types can be used as Neo4j node labels;
- Edge types can be used as Neo4j edge relationship types: edges with multiple types result in multiple Neo4j relationships with respective types and their properties replicated (this behaviour is implemented due to the fact that Neo4j relationships can have exactly one relationship type).


Graph preprocessing with BlueGraph
==================================


Semantic property encoding
--------------------------

Updates to the encoders:

- :code:`Word2VecModel` is renamed to :code:`Doc2VecEncoder` and is inherited from :code:`bluegraph.downstream.Preprocessor`;
- Wrapped scikit-learn's :code:`TfidfVectorizer` into :code:`TfIdfEncoder` inheriting :code:`bluegraph.downstream.Preprocessor`.

The above-mentioned changes allow using the BlueGraph's encoders as a part of :code:`EmbeddingPipeline`).


Downstream tasks with BlueGraph
===============================


Similarity API
--------------

Similarity processor updates:

- Smarter handling of elements not existing in the index (when vectors or similar points are requested, :code:`None` is returned).
- Segmented Faiss index can be initialized without vectors, the model can be trained on the first call to :code:`add`.



Embedding pipelines
--------------------

Embedding pipeline updates:

- Added basic prediction interface (the :code:`run_prediction` method);
- Modified :code:`get_similar_points` to be able to query similarity for the unknown vectors;
- Optimized embedding pipeline memory usage: embedding table is not stored explicitly, but is a part of Faiss index.


Services
========


Embedder
--------

Embedder is a mini-service for retrieving embedding vectors and similar points distributed as a part of BlueGraph. A detailed description of the API can be found `here <https://github.com/BlueBrain/BlueGraph/blob/master/services/embedder/api.yaml>`_. Two examples can be found in the `Embedder API for NCIt term embedding <https://github.com/BlueBrain/BlueGraph/blob/master/services/embedder/examples/notebooks/Embedder%20API%20for%20NCIt%20term%20embedding.ipynb>`_ notebook and `Embedder API for node embedding <https://github.com/BlueBrain/BlueGraph/blob/master/services/embedder/examples/notebooks/Embedder%20API%20for%20node%20embedding.ipynb>`_.

This release includes the following updates to the service:

- Embedder app can predict vectors for unseen points, the following formats can be passed on the input
* :code:`raw`: raw data as is
* :code:`json_pgframe`: a JSON representation of a PGFrame
* :code:`nexus_dataset`: endpoint, bucket, resource id and a Nexus token (in the request header), fetches the dataset by resource ID, downloads it and creates a PGFrame (dataset is a JSON representation of a PGFrame).
- API changes: the POST method for :code:`embedding/` and :code:`similar-points/` operates on unseen points;
- Dockerfile fix (smaller image size), dockerignore updates
- Embedder app can fetch local models from the directory (specified in the configs).
1 change: 1 addition & 0 deletions docs/source/services_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Services API

This package contains a set of services distributed as a part of BlueGraph.


BlueBrainEmbedder
-----------------

Expand Down
6 changes: 3 additions & 3 deletions services/embedder/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,16 @@ def _get_meta_data(model_name, file):
app.config.from_pyfile('configs/app_config.py')


try:
if app.config["LOCAL"] is False:
TOKEN = os.environ["NEXUS_TOKEN"]
app.forge = KnowledgeGraphForge(
app.config["FORGE_CONFIG"],
token=TOKEN)
except KeyError:
else:
app.forge = None

app.models = {}
_retrieve_models()
_retrieve_models(app.config["LOCAL"])

# --------------- Handlers ----------------

Expand Down
1 change: 1 addition & 0 deletions services/embedder/configs/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
"""Embedding service configs."""
FORGE_CONFIG = "configs/forge_configs/forge-config.yml"
DOWNLOAD_DIR = "downloads/"
LOCAL = True

DEBUG = True
Binary file not shown.
Loading

0 comments on commit 906609c

Please sign in to comment.