diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 588940e1..82687161 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,7 +49,7 @@ All commits shall pass the lint pipeline of the following tools: - Mypy (see [pyproject.toml](https://github.com/crocs-muni/sec-certs/blob/main/pyproject.toml) for settings) - Ruff (see [pyproject.toml](https://github.com/crocs-muni/sec-certs/blob/main/pyproject.toml) for settings) -These tools can be installed via [dev_requirements.txt](https://github.com/crocs-muni/sec-certs/blob/main/dev_requirements.txt) You can use [pre-commit](https://pre-commit.com/) tool to register git hook that will evalute these checks prior to any commit and abort the commit for you. Note that the pre-commit is not meant to automatically fix the issues, just warn you. +These tools can be installed via [dev_requirements.txt](https://github.com/crocs-muni/sec-certs/blob/main/requirements/dev_requirements.txt) You can use [pre-commit](https://pre-commit.com/) tool to register git hook that will evalute these checks prior to any commit and abort the commit for you. Note that the pre-commit is not meant to automatically fix the issues, just warn you. It should thus suffice to: diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb..a005bfc7 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -14,6 +14,9 @@ help: .PHONY: help Makefile +linkcheck: + @$(SPHINXBUILD) -b linkcheck . $(BUILDDIR)/linkcheck + # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile diff --git a/docs/user_guide.md b/docs/user_guide.md index 3eb8320e..a94b60c8 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -21,3 +21,14 @@ nvd_api_key: null # or the actual key value ``` If you aim to fetch the sources from NVD, we advise you to get an [NVD API key](https://nvd.nist.gov/developers/request-an-api-key) and set the `nvd_api_key` setting accordingly. The download from NVD will work even without API key, it will just be slow. No API key is needed when `preferred_source_nvd_datasets: "sec-certs"` + + +## Infering inter-certificate reference context + +```{important} +This is an experimental feature. +``` + +We provide a model that can predict the context of inter-certificate references based on the text embedded in the artifacts. The model output is not incorporated into the `CCCertificate` instances, but can be dumped into a `.csv` file from where it can be correlated with a DataFrame of certificate features. + +To train and deploy the model, it should be sufficient to change some paths and run the [prediction notebook](https://github.com/crocs-muni/sec-certs/blob/main/notebooks/cc/reference_annotations/prediction.ipynb). The output of this notebook is a `prediction.csv` file that can be loaded into the [references notebook](https://github.com/crocs-muni/sec-certs/blob/main/notebooks/cc/references.ipynb). This notebook documents the full analysis of references conducted on the Common Criteria certificates. Among others, the notebook generates some further `.csv` files that can subsequently be plotted via [plotting notebook](https://github.com/crocs-muni/sec-certs/blob/main/notebooks/cc/paper2_plots.ipynb). diff --git a/pyproject.toml b/pyproject.toml index 401a0ae9..ae19a5c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ sec-certs = "sec_certs.cli:main" [tool.ruff] - select = [ + lint.select = [ "I", # isort "E", # pycodestyle "W", # pycodestyle @@ -113,14 +113,14 @@ "C4", # comprehensions "SIM", ] - ignore = [ + lint.ignore = [ "E501", # line-length, should be handled by ruff format ] src = ["src", "tests"] line-length = 120 target-version = "py310" - [tool.ruff.mccabe] + [tool.ruff.lint.mccabe] max-complexity = 10 [tool.setuptools.package-data] diff --git a/src/sec_certs/sample/cpe.py b/src/sec_certs/sample/cpe.py index 5e9e8b65..e8ec29eb 100644 --- a/src/sec_certs/sample/cpe.py +++ b/src/sec_certs/sample/cpe.py @@ -32,16 +32,16 @@ def __lt__(self, other: CPEMatchCriteria) -> bool: @classmethod def from_nist_dict(cls, dct: dict[str, Any]) -> CPEMatchCriteria: - if dct.get("versionStartIncluding", None): + if dct.get("versionStartIncluding"): version_start = ("including", dct["versionStartIncluding"]) elif dct.get("versionStartExcluding"): version_start = ("excluding", dct["versionStartExcluding"]) else: version_start = None - if dct.get("versionEndIncluding", None): + if dct.get("versionEndIncluding"): version_end = ("including", dct["versionEndIncluding"]) - elif dct.get("versionEndExcluding", None): + elif dct.get("versionEndExcluding"): version_end = ("excluding", dct["versionEndExcluding"]) else: version_end = None diff --git a/src/sec_certs/sample/fips.py b/src/sec_certs/sample/fips.py index 8e9268cf..26b9833e 100644 --- a/src/sec_certs/sample/fips.py +++ b/src/sec_certs/sample/fips.py @@ -53,7 +53,7 @@ def get_web_data_and_algorithms(self) -> tuple[set[str], FIPSCertificate.WebData def _build_details_dict(self, details_div: Tag) -> dict[str, Any]: def parse_single_detail_entry(key, entry): normalized_key = DETAILS_KEY_NORMALIZATION_DICT[key] - normalization_func = DETAILS_KEY_TO_NORMALIZATION_FUNCTION.get(normalized_key, None) + normalization_func = DETAILS_KEY_TO_NORMALIZATION_FUNCTION.get(normalized_key) normalized_entry = ( FIPSHTMLParser.normalize_string(entry.text) if not normalization_func else normalization_func(entry) ) diff --git a/src/sec_certs/utils/extract.py b/src/sec_certs/utils/extract.py index 669cbcfc..340ad568 100644 --- a/src/sec_certs/utils/extract.py +++ b/src/sec_certs/utils/extract.py @@ -727,8 +727,7 @@ def load_text_file( whole_text = "" whole_text_with_newlines = "" - lines_included = 0 - for line in lines: + for lines_included, line in enumerate(lines): if limit_max_lines != -1 and lines_included >= limit_max_lines: break @@ -736,7 +735,6 @@ def load_text_file( line = line.replace("\n", "") whole_text += line whole_text += line_separator - lines_included += 1 return whole_text, whole_text_with_newlines, was_unicode_decode_error