Skip to content

Commit ae34c34

Browse files
AMaini503Aayush Maini
andauthored
Unify Rust detection logic with SBOM mode, ownership mapping, and skip optimizations (#1474)
* Refactor parsing logic to separate classes * Fix RustCli UTs * Add skeleton for RustComponentDetector * Compute locations for registrations in SBOM * Add interface for rust cli parsing * Add support to use cached metadata in rust cli parser * Remove redudant normalization of the paths * Use ownership map in fallback mode as well * Copy new detector logic into existing RustSbomDetector * Check parentId in graph before adding parent child edge * Use normalized path in logs * Fix rust cli detector UTs * Extract all rust parsers into interfaces * Fix rust UTs * Add UTs for rust parsers * Respect DisableRustCli flag in RustMetadataContextBuilder * Add UTs for RustMetadataContextBuilder * Add UTs for RustSbomDetector * Add dependency edges in SBOM mode as well * Fix sbom parser UT * Add more UTs for RustSbomDetector * Add more UTs for Rust Parsers * CR: Use const string cargo file names, bump detector versions --------- Co-authored-by: Aayush Maini <[email protected]>
1 parent cf8fa35 commit ae34c34

19 files changed

+6634
-613
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Rust;
2+
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Threading;
6+
using System.Threading.Tasks;
7+
using Microsoft.ComponentDetection.Detectors.Rust.Contracts;
8+
9+
/// <summary>
10+
/// Provides functionality to construct contextual metadata for Rust packages,
11+
/// specifically mapping package (crate) names to the <c>Cargo.toml</c> manifests
12+
/// that declare or reference them.
13+
/// </summary>
14+
public interface IRustMetadataContextBuilder
15+
{
16+
/// <summary>
17+
/// Builds a mapping of package (crate) names to the set of <c>Cargo.toml</c> files
18+
/// (supplied in dependency resolution order) that either declare or reference them,
19+
/// and returns additional ownership metadata. Also returns a cache of raw Cargo metadata
20+
/// per manifest so downstream detection code can avoid invoking <c>cargo metadata</c> again.
21+
/// </summary>
22+
/// <param name="orderedTomlPaths">
23+
/// An ordered enumeration of paths to <c>Cargo.toml</c> manifest files. The order should
24+
/// reflect dependency resolution (e.g. workspace root manifests first, followed by members).
25+
/// </param>
26+
/// <param name="cancellationToken">A token used to observe cancellation requests.</param>
27+
/// <returns>An <see cref="OwnershipResult"/> with ownership and per-manifest metadata cache.</returns>
28+
public Task<OwnershipResult> BuildPackageOwnershipMapAsync(
29+
IEnumerable<string> orderedTomlPaths,
30+
CancellationToken cancellationToken);
31+
32+
/// <summary>
33+
/// Represents the result of building Rust package ownership metadata.
34+
/// </summary>
35+
public class OwnershipResult
36+
{
37+
/// <summary>
38+
/// Mapping from a package (crate) id to all <c>Cargo.toml</c> manifest
39+
/// paths that declare or reference that package.
40+
/// </summary>
41+
public Dictionary<string, HashSet<string>> PackageToTomls { get; set; }
42+
= new(StringComparer.OrdinalIgnoreCase);
43+
44+
/// <summary>
45+
/// Set of <c>Cargo.toml</c> manifest paths that declare local packages.
46+
/// </summary>
47+
public HashSet<string> LocalPackageManifests { get; set; }
48+
= new(StringComparer.OrdinalIgnoreCase);
49+
50+
/// <summary>
51+
/// Raw <c>cargo metadata</c> JSON (already parsed) per manifest path (normalized).
52+
/// This enables downstream detectors to reuse metadata without issuing
53+
/// another CLI call.
54+
/// </summary>
55+
public Dictionary<string, CargoMetadata> ManifestToMetadata { get; set; }
56+
= new(StringComparer.OrdinalIgnoreCase);
57+
58+
// Manifests for which cargo metadata failed.
59+
public HashSet<string> FailedManifests { get; set; } = new(StringComparer.OrdinalIgnoreCase);
60+
}
61+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Rust;
2+
3+
using System.Threading;
4+
using System.Threading.Tasks;
5+
using Microsoft.ComponentDetection.Contracts;
6+
7+
public interface IRustCargoLockParser
8+
{
9+
public Task<int?> ParseAsync(
10+
IComponentStream componentStream,
11+
ISingleFileComponentRecorder singleFileComponentRecorder,
12+
CancellationToken cancellationToken);
13+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Rust;
2+
3+
using System.Collections.Generic;
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
using Microsoft.ComponentDetection.Contracts;
7+
using Microsoft.ComponentDetection.Detectors.Rust.Contracts;
8+
9+
/// <summary>
10+
/// Provides methods to parse Rust <c>Cargo.toml</c> / workspace dependency information into the component
11+
/// recording system. Implementations may choose to invoke the Rust CLI (e.g. <c>cargo metadata</c>) or
12+
/// operate on already-supplied serialized metadata.
13+
/// </summary>
14+
/// <remarks>
15+
/// There are three entry points:
16+
/// 1. <see cref="ParseAsync"/> triggers a fresh acquisition (typically by invoking the Cargo CLI).
17+
/// 2. <see cref="ParseFromMetadataAsync(IComponentStream, ISingleFileComponentRecorder, CargoMetadata, IComponentRecorder, IReadOnlyDictionary{string, HashSet{string}}, CancellationToken)"/> consumes a pre-fetched <see cref="CargoMetadata"/> blob.
18+
/// 3. <see cref="ParseFromMetadataAsync(IComponentStream, ISingleFileComponentRecorder, CargoMetadata, IComponentRecorder, IReadOnlyDictionary{string, HashSet{string}}, CancellationToken)"/> adds support
19+
/// for multi-file / workspace ownership resolution by leveraging a parent recorder and an ownership map.
20+
/// </remarks>
21+
public interface IRustCliParser
22+
{
23+
/// <summary>
24+
/// Parses Rust dependency information for the supplied component stream (generally a <c>Cargo.toml</c>)
25+
/// by invoking the Cargo CLI (e.g. running <c>cargo metadata</c>) and recording discovered components
26+
/// and dependency edges into the provided <paramref name="recorder"/>.
27+
/// </summary>
28+
/// <param name="componentStream">The stream representing the manifest file being parsed.</param>
29+
/// <param name="recorder">The per-file component recorder used to register detected components and graph edges.</param>
30+
/// <param name="cancellationToken">A token that can be used to cancel the parse operation.</param>
31+
/// <returns>
32+
/// A <see cref="ParseResult"/> indicating success or failure (with failure reason and any relevant local
33+
/// package directories that were resolved).
34+
/// </returns>
35+
public Task<ParseResult> ParseAsync(
36+
IComponentStream componentStream,
37+
ISingleFileComponentRecorder recorder,
38+
CancellationToken cancellationToken);
39+
40+
/// <summary>
41+
/// Parses Rust dependency information using a pre-obtained <see cref="CargoMetadata"/> object, with support
42+
/// for attributing discovered packages to owning manifests in a multi-project / workspace scenario.
43+
/// </summary>
44+
/// <param name="componentStream">The manifest stream being processed (used primarily for location context).</param>
45+
/// <param name="fallbackRecorder">
46+
/// A single-file recorder used if ownership cannot be resolved to a more specific recorder via the
47+
/// <paramref name="ownershipMap"/> or <paramref name="parentComponentRecorder"/>.
48+
/// </param>
49+
/// <param name="cachedMetadata">The pre-fetched Cargo metadata describing packages and their relationships.</param>
50+
/// <param name="parentComponentRecorder">
51+
/// The parent recorder that can produce (or correlate) other single-file recorders used to correctly
52+
/// attribute dependencies to their originating manifest locations.
53+
/// </param>
54+
/// <param name="ownershipMap">
55+
/// A mapping of package ID (or equivalent key) to a set of manifest file paths indicating ownership.
56+
/// Used to decide which recorder should own which package entries.
57+
/// </param>
58+
/// <param name="cancellationToken">A token to cancel the operation.</param>
59+
/// <returns>A <see cref="ParseResult"/> containing success state and any local package directories discovered.</returns>
60+
public Task<ParseResult> ParseFromMetadataAsync(
61+
IComponentStream componentStream,
62+
ISingleFileComponentRecorder fallbackRecorder,
63+
CargoMetadata cachedMetadata,
64+
IComponentRecorder parentComponentRecorder,
65+
IReadOnlyDictionary<string, HashSet<string>> ownershipMap,
66+
CancellationToken cancellationToken);
67+
68+
/// <summary>
69+
/// Result of parsing a Cargo.toml file.
70+
/// </summary>
71+
public class ParseResult
72+
{
73+
/// <summary>
74+
/// Gets or sets a value indicating whether parsing was successful.
75+
/// </summary>
76+
public bool Success { get; set; }
77+
78+
/// <summary>
79+
/// Gets or sets the error message if parsing failed.
80+
/// </summary>
81+
public string ErrorMessage { get; set; }
82+
83+
/// <summary>
84+
/// Gets or sets the reason for failure if parsing failed.
85+
/// </summary>
86+
public string FailureReason { get; set; }
87+
88+
/// <summary>
89+
/// Gets or sets the local package directories that should be marked as visited.
90+
/// This allows upstream client to skip TOMLs that were already accounted for in this run.
91+
/// </summary>
92+
public HashSet<string> LocalPackageDirectories { get; set; } = [];
93+
}
94+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Rust;
2+
3+
using System.Collections.Generic;
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
using Microsoft.ComponentDetection.Contracts;
7+
8+
public interface IRustSbomParser
9+
{
10+
public Task<int?> ParseAsync(
11+
IComponentStream componentStream,
12+
ISingleFileComponentRecorder recorder,
13+
CancellationToken cancellationToken);
14+
15+
public Task<int?> ParseWithOwnershipAsync(
16+
IComponentStream componentStream,
17+
ISingleFileComponentRecorder sbomRecorder,
18+
IComponentRecorder parentComponentRecorder,
19+
IReadOnlyDictionary<string, HashSet<string>> ownershipMap,
20+
CancellationToken cancellationToken);
21+
}

0 commit comments

Comments
 (0)