From 573213bbb75f49e7f6465d18fdde206c327a640e Mon Sep 17 00:00:00 2001 From: Jens Reimann Date: Thu, 19 Sep 2024 09:33:03 +0200 Subject: [PATCH] fix: ensure UUIDs are stable, and we have a stable insertion order --- .../ingestor/src/graph/sbom/common/license.rs | 65 ++++++++++++++++--- modules/ingestor/tests/parallel.rs | 61 +++++++++++++++-- 2 files changed, 113 insertions(+), 13 deletions(-) diff --git a/modules/ingestor/src/graph/sbom/common/license.rs b/modules/ingestor/src/graph/sbom/common/license.rs index 8ab28cc2..f71d0c59 100644 --- a/modules/ingestor/src/graph/sbom/common/license.rs +++ b/modules/ingestor/src/graph/sbom/common/license.rs @@ -1,19 +1,17 @@ -use sea_orm::ActiveValue::Set; -use sea_orm::{ConnectionTrait, DbErr, EntityTrait}; +use sea_orm::{ActiveValue::Set, ConnectionTrait, DbErr, EntityTrait}; use sea_query::OnConflict; -use std::collections::HashMap; -use trustify_entity::license; -use uuid::Uuid; - use spdx_expression::SpdxExpression; +use std::collections::{BTreeMap, HashMap}; use tracing::instrument; use trustify_common::db::chunk::EntityChunkedIter; +use trustify_entity::license; +use uuid::Uuid; const NAMESPACE: Uuid = Uuid::from_bytes([ 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x41, 0x18, 0xa1, 0x38, 0xb8, 0x9f, 0x19, 0x35, 0xe0, 0xa7, ]); -#[derive(Default, Debug, Clone)] +#[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct LicenseInfo { pub license: String, pub refs: HashMap, @@ -24,7 +22,9 @@ impl LicenseInfo { let mut text = self.license.clone(); for (user_ref, user_license) in &self.refs { - text = text.replace(user_ref, user_license); + if &text == user_ref { + text = user_license.clone(); + } } // UUID based upon a hash of the lowercase de-ref'd license. @@ -55,7 +55,11 @@ impl LicenseInfo { #[derive(Default, Debug)] pub struct LicenseCreator { - licenses: HashMap, + /// The licenses to create. + /// + /// Uses a [`BTreeMap`] to ensure we have a stable insertion order, avoiding deadlocks on the + /// database. + licenses: BTreeMap, } impl LicenseCreator { @@ -110,3 +114,46 @@ impl LicenseCreator { Ok(()) } } + +#[cfg(test)] +mod test { + use crate::graph::sbom::LicenseInfo; + use std::collections::HashMap; + + #[test] + fn stable_uuid() { + // create a new license, ensure a new random state of the hashmap + let license = || LicenseInfo { + license: "LicenseRef-1-2-3".to_string(), + refs: { + let mut refs = HashMap::new(); + refs.insert("LicenseRef".to_string(), "CyberNeko License".to_string()); + refs.insert( + "LicenseRef-1".to_string(), + "CyberNeko License 1".to_string(), + ); + refs.insert( + "LicenseRef-1-2".to_string(), + "CyberNeko License 1-2".to_string(), + ); + refs.insert( + "LicenseRef-1-2-3".to_string(), + "CyberNeko License 1-2-3".to_string(), + ); + refs + }, + }; + + // the original one we compare to + let original = license(); + + for _ in 0..10 { + // a new one, containing the same information + let new = license(); + // should be equal + assert_eq!(original, new); + // and generate the same UUID + assert_eq!(original.uuid(), new.uuid()); + } + } +} diff --git a/modules/ingestor/tests/parallel.rs b/modules/ingestor/tests/parallel.rs index 887a4c4d..5085e656 100644 --- a/modules/ingestor/tests/parallel.rs +++ b/modules/ingestor/tests/parallel.rs @@ -3,13 +3,17 @@ use csaf::Csaf; use serde_json::Value; use spdx_rs::models::SPDX; -use std::str::FromStr; +use std::{collections::HashMap, str::FromStr}; use test_context::{futures, test_context}; use test_log::test; use tracing::instrument; use trustify_common::{cpe::Cpe, purl::Purl}; use trustify_module_ingestor::{ - graph::{cpe::CpeCreator, purl::creator::PurlCreator, sbom::spdx::parse_spdx}, + graph::{ + cpe::CpeCreator, + purl::creator::PurlCreator, + sbom::{spdx::parse_spdx, LicenseCreator, LicenseInfo}, + }, service::{Discard, Format}, }; use trustify_test_context::{document_bytes, spdx::fix_spdx_rels, TrustifyContext}; @@ -143,7 +147,7 @@ async fn csaf_parallel(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { #[instrument] #[test(tokio::test(flavor = "multi_thread", worker_threads = 4))] #[ignore = "Enable once the PURL database structure has been refactored"] -async fn purl_parallel(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { +async fn purl_creator(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { const NUM: usize = 25; const ITEMS: usize = 25; @@ -187,7 +191,7 @@ async fn purl_parallel(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { #[test_context(TrustifyContext)] #[instrument] #[test(tokio::test(flavor = "multi_thread", worker_threads = 4))] -async fn cpe_parallel(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { +async fn cpe_creator(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { const NUM: usize = 25; const ITEMS: usize = 25; @@ -222,3 +226,52 @@ async fn cpe_parallel(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { Ok(()) } + +/// Ingest x * y licenses in parallel +#[test_context(TrustifyContext)] +#[instrument] +#[test(tokio::test(flavor = "multi_thread", worker_threads = 4))] +async fn license_creator(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + const NUM: usize = 25; + const ITEMS: usize = 100; + + let mut tasks = vec![]; + + for _ in 0..NUM { + let db = ctx.db.clone(); + tasks.push(async move { + let mut creator = LicenseCreator::new(); + + for i in 0..ITEMS { + creator.add(&LicenseInfo { + license: format!("FOO-{i}"), + refs: { + let mut refs = HashMap::new(); + refs.insert(format!("FOO-{i}"), "License Text {i}".to_string()); + refs + }, + }); + creator.add(&LicenseInfo { + license: format!("BAR-{i}"), + refs: Default::default(), + }); + } + + creator.create(&db).await?; + + Ok::<_, anyhow::Error>(()) + }); + } + + // progress ingestion tasks + + let result = futures::future::join_all(tasks).await; + + // now test + + assert_all_ok(NUM, result); + + // done + + Ok(()) +}