Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c2ec4ac
Document convergent limitation on intrinsics
Flakebi Mar 24, 2026
208d6af
Merge ref '0e95a0f4c677' from rust-lang/rust
invalid-email-address Mar 31, 2026
cd2f366
Auto merge of #154924 - nnethercote:rm-impl-HashStable-for-Attr-slice…
bors Apr 20, 2026
76f6332
Auto merge of #155491 - ohadravid:faster-storage-in-copyprop-and-gvn,…
bors Apr 21, 2026
7c9d071
Auto merge of #155343 - dianqk:indirect-by-ref, r=nikic
bors Apr 22, 2026
6535809
Auto merge of #154596 - alexcrichton:update-wasi-sdk, r=jieyouxu
bors Apr 23, 2026
6cde854
Auto merge of #155709 - tgross35:compiler-builtins-sync-2026-04-22, r…
bors Apr 24, 2026
d895205
Rollup merge of #149637 - Flakebi:fix-convergent-mir-opts, r=nnethercote
JonathanBrouwer May 1, 2026
4e31351
Rollup merge of #154971 - fmease:enum-var-verify-enum-seg, r=BoxyUwU
JonathanBrouwer May 1, 2026
156ebca
Rollup merge of #155186 - cijiugechu:fix/loop-match-no-self-assign, r…
JonathanBrouwer May 1, 2026
0eff892
Rollup merge of #155948 - SynapLink:fix/pub-visibility-order, r=petro…
JonathanBrouwer May 1, 2026
8c1674f
Rollup merge of #156001 - Human9000-bit:ssa-range-prop-155836, r=dianqk
JonathanBrouwer May 1, 2026
2ef2b86
Rollup merge of #155600 - CrooseGit:dev/reucru01/adds-polonius-ui-tes…
JonathanBrouwer May 1, 2026
7de9663
Rollup merge of #155995 - Mrmaxmeier:debuginfo-embed-external-source,…
JonathanBrouwer May 1, 2026
c21d285
Rollup merge of #156019 - nnethercote:rm-Feed, r=oli-obk
JonathanBrouwer May 1, 2026
b671a51
Rollup merge of #156031 - nnethercote:lex_token_trees, r=chenyukang
JonathanBrouwer May 1, 2026
d1a295a
Auto merge of #156041 - JonathanBrouwer:rollup-yc0vOVk, r=JonathanBro…
bors May 1, 2026
23c2454
Auto merge of #153968 - jyn514:jyn/linker-warn-by-default, r=mati865
bors May 1, 2026
fee325d
Auto merge of #152277 - gurry:114865-ice-format-args, r=chenyukang
bors May 3, 2026
65061a8
Auto merge of #155996 - flip1995:clippy-subtree-update, r=Manishearth
bors May 3, 2026
4ba4f07
Prepare for merging from rust-lang/rust
invalid-email-address May 4, 2026
5f20a7c
Merge ref '045b17737dab' from rust-lang/rust
invalid-email-address May 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/core_arch/src/amdgpu/intrinsic_is_convergent.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
This intrinsic does not behave like a normal function call; it is a "[convergent]" operation and as such has non-standard control-flow effects which need special treatment by the language.
Rust currently does not properly support convergent operations.
This operation is hence provided on a best-effort basis.
Using it may result in incorrect code under some circumstances.

[convergent]: https://llvm.org/docs/ConvergentOperations.html
72 changes: 72 additions & 0 deletions crates/core_arch/src/amdgpu/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ pub fn wavefrontsize() -> u32 {
/// Synchronize all wavefronts in a workgroup.
///
/// Each wavefronts in a workgroup waits at the barrier until all wavefronts in the workgroup arrive at a barrier.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn s_barrier() {
Expand All @@ -253,6 +255,8 @@ pub fn s_barrier() {
/// Signal a specific barrier type.
///
/// Only for non-named barriers.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn s_barrier_signal<const BARRIER_TYPE: i32>() {
Expand All @@ -265,6 +269,8 @@ pub unsafe fn s_barrier_signal<const BARRIER_TYPE: i32>() {
/// Provides access to the s_barrier_signal_first instruction;
/// additionally ensures that the result value is valid even when
/// the intrinsic is used from a wavefront that is not running in a workgroup.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn s_barrier_signal_isfirst<const BARRIER_TYPE: i32>() -> bool {
Expand All @@ -274,6 +280,8 @@ pub unsafe fn s_barrier_signal_isfirst<const BARRIER_TYPE: i32>() -> bool {
/// Wait for a specific barrier type.
///
/// Only for non-named barriers.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn s_barrier_wait<const BARRIER_TYPE: i16>() {
Expand All @@ -283,6 +291,8 @@ pub unsafe fn s_barrier_wait<const BARRIER_TYPE: i16>() {
/// Get the state of a specific barrier type.
///
/// The `barrier_type` argument must be uniform, otherwise behavior is undefined.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn s_get_barrier_state<const BARRIER_TYPE: i32>() -> u32 {
Expand All @@ -292,6 +302,8 @@ pub unsafe fn s_get_barrier_state<const BARRIER_TYPE: i32>() -> u32 {
/// A barrier for only the threads within the current wavefront.
///
/// Does not result in an instruction but restricts the compiler.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_barrier() {
Expand All @@ -315,6 +327,8 @@ pub fn wave_barrier() {
/// - 0x0100: All DS read instructions may be scheduled across `sched_barrier`.
/// - 0x0200: All DS write instructions may be scheduled across `sched_barrier`.
/// - 0x0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across `sched_barrier`.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn sched_barrier<const MASK: u32>() {
Expand Down Expand Up @@ -345,6 +359,8 @@ pub unsafe fn sched_barrier<const MASK: u32>() {
/// // 5 MFMA
/// sched_group_barrier::<8, 5, 0>()
/// ```
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn sched_group_barrier<const MASK: u32, const SIZE: u32, const SYNC_ID: u32>() {
Expand All @@ -366,6 +382,8 @@ pub fn s_sleep<const COUNT: u32>() {
/// Stop execution of the kernel.
///
/// This usually signals an error state.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn s_sethalt<const VALUE: u32>() -> ! {
Expand Down Expand Up @@ -407,6 +425,8 @@ pub fn mbcnt_hi(value: u32, init: u32) -> u32 {

/// Returns a bitfield (`u32` or `u64`) containing the result of its i1 argument
/// in all active lanes, and zero in all inactive lanes.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn ballot(b: bool) -> u64 {
Expand All @@ -419,6 +439,8 @@ pub fn ballot(b: bool) -> u64 {
/// While [`ballot`] converts a `bool` to a mask, `inverse_ballot` converts a mask back to a `bool`.
/// This means `inverse_ballot(ballot(b)) == b`.
/// The inverse of `ballot(inverse_ballot(value)) ~= value` is not always true as inactive lanes are set to zero by `ballot`.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn inverse_ballot(value: u64) -> bool {
Expand All @@ -433,6 +455,8 @@ pub fn inverse_ballot(value: u64) -> bool {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_umin<const STRATEGY: u32>(value: u32) -> u32 {
Expand All @@ -447,6 +471,8 @@ pub fn wave_reduce_umin<const STRATEGY: u32>(value: u32) -> u32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_min<const STRATEGY: u32>(value: i32) -> i32 {
Expand All @@ -462,6 +488,8 @@ pub fn wave_reduce_min<const STRATEGY: u32>(value: i32) -> i32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_umax<const STRATEGY: u32>(value: u32) -> u32 {
Expand All @@ -476,6 +504,8 @@ pub fn wave_reduce_umax<const STRATEGY: u32>(value: u32) -> u32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_max<const STRATEGY: u32>(value: i32) -> i32 {
Expand All @@ -491,6 +521,8 @@ pub fn wave_reduce_max<const STRATEGY: u32>(value: i32) -> i32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_add<const STRATEGY: u32>(value: u32) -> u32 {
Expand All @@ -506,6 +538,8 @@ pub fn wave_reduce_add<const STRATEGY: u32>(value: u32) -> u32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_and<const STRATEGY: u32>(value: u32) -> u32 {
Expand All @@ -520,6 +554,8 @@ pub fn wave_reduce_and<const STRATEGY: u32>(value: u32) -> u32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_or<const STRATEGY: u32>(value: u32) -> u32 {
Expand All @@ -534,6 +570,8 @@ pub fn wave_reduce_or<const STRATEGY: u32>(value: u32) -> u32 {
/// - 2: DPP
///
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn wave_reduce_xor<const STRATEGY: u32>(value: u32) -> u32 {
Expand All @@ -544,12 +582,16 @@ pub fn wave_reduce_xor<const STRATEGY: u32>(value: u32) -> u32 {
// The following intrinsics can have multiple sizes

/// Get `value` from the first active lane in the wavefront.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn readfirstlane_u32(value: u32) -> u32 {
llvm_readfirstlane_u32(value)
}
/// Get `value` from the first active lane in the wavefront.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn readfirstlane_u64(value: u64) -> u64 {
Expand All @@ -559,6 +601,8 @@ pub fn readfirstlane_u64(value: u64) -> u64 {
///
/// The lane argument must be uniform across the currently active threads
/// of the current wavefront. Otherwise, the result is undefined.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn readlane_u32(value: u32, lane: u32) -> u32 {
Expand All @@ -568,6 +612,8 @@ pub unsafe fn readlane_u32(value: u32, lane: u32) -> u32 {
///
/// The lane argument must be uniform across the currently active threads
/// of the current wavefront. Otherwise, the result is undefined.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn readlane_u64(value: u64, lane: u32) -> u64 {
Expand All @@ -582,6 +628,8 @@ pub unsafe fn readlane_u64(value: u64, lane: u32) -> u64 {
///
/// `value` is the value returned by `lane`.
/// `default` is the value returned by all lanes other than `lane`.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
Expand All @@ -596,6 +644,8 @@ pub unsafe fn writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
///
/// `value` is the value returned by `lane`.
/// `default` is the value returned by all lanes other than `lane`.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
Expand All @@ -605,6 +655,8 @@ pub unsafe fn writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
/// Stop execution of the wavefront.
///
/// This usually signals the end of a successful execution.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub fn endpgm() -> ! {
Expand All @@ -621,6 +673,8 @@ pub fn endpgm() -> ! {
/// v_mov_b32 <dest> <old>
/// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
/// ```
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn update_dpp<
Expand Down Expand Up @@ -651,6 +705,8 @@ pub fn s_memrealtime() -> u64 {
///
/// Reading from inactive lanes returns `0`.
/// In case multiple values get written to the same `lane`, the value from the source lane with the higher index is taken.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn ds_permute(lane: u32, value: u32) -> u32 {
Expand All @@ -661,6 +717,8 @@ pub unsafe fn ds_permute(lane: u32, value: u32) -> u32 {
/// Returns the `value` given to `ds_permute` by lane `lane`.
///
/// Reading from inactive lanes returns `0`.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn ds_bpermute(lane: u32, value: u32) -> u32 {
Expand All @@ -680,6 +738,8 @@ pub unsafe fn perm(src0: u32, src1: u32, selector: u32) -> u32 {
///
/// The third and fourth inputs must be uniform across the current wavefront.
/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlane16_u32<const FI: bool, const BOUND_CONTROL: bool>(
Expand All @@ -696,6 +756,8 @@ pub unsafe fn permlane16_u32<const FI: bool, const BOUND_CONTROL: bool>(
///
/// The third and fourth inputs must be uniform across the current wavefront.
/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlanex16_u32<const FI: bool, const BOUND_CONTROL: bool>(
Expand All @@ -718,6 +780,8 @@ pub fn s_get_waveid_in_workgroup() -> u32 {
/// Swap `value` between upper and lower 32 lanes in a wavefront.
///
/// Does nothing for wave32.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlane64_u32(value: u32) -> u32 {
Expand All @@ -728,6 +792,8 @@ pub unsafe fn permlane64_u32(value: u32) -> u32 {
/// Performs arbitrary gather-style operation within a row (16 contiguous lanes) of the second input operand.
///
/// In contrast to [`permlane16_u32`], allows each lane to specify its own gather lane.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlane16_var<const FI: bool, const BOUND_CONTROL: bool>(
Expand All @@ -742,6 +808,8 @@ pub unsafe fn permlane16_var<const FI: bool, const BOUND_CONTROL: bool>(
/// Performs arbitrary gather-style operation across two rows (16 contiguous lanes) of the second input operand.
///
/// In contrast to [`permlanex16_u32`], allows each lane to specify its own gather lane.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlanex16_var<const FI: bool, const BOUND_CONTROL: bool>(
Expand All @@ -766,6 +834,8 @@ pub fn wave_id() -> u32 {
/// Odd rows of the first operand are swapped with even rows of the second operand (one row is 16 lanes).
/// Returns a pair for the swapped registers.
/// The first element of the return corresponds to the swapped element of the first argument.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlane16_swap<const FI: bool, const BOUND_CONTROL: bool>(
Expand All @@ -782,6 +852,8 @@ pub unsafe fn permlane16_swap<const FI: bool, const BOUND_CONTROL: bool>(
/// Rows 2 and 3 of the first operand are swapped with rows 0 and 1 of the second operand (one row is 16 lanes).
/// Returns a pair for the swapped registers.
/// The first element of the return corresponds to the swapped element of the first argument.
///
#[doc = include_str!("intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
pub unsafe fn permlane32_swap<const FI: bool, const BOUND_CONTROL: bool>(
Expand Down
2 changes: 2 additions & 0 deletions crates/core_arch/src/nvptx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ unsafe extern "C" {
}

/// Synchronizes all threads in the block.
///
#[doc = include_str!("../amdgpu/intrinsic_is_convergent.md")]
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _syncthreads() -> () {
Expand Down
2 changes: 1 addition & 1 deletion rust-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e22c616e4e87914135c1db261a03e0437255335e
045b17737dab5fcc28e4cbee0cfe2ce4ed363b32