; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging  < %s | FileCheck %s --check-prefix=CHECK2
; #include <omp.h>
; void foo();
; void use(int);
; void usef(float);
; void merge(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void unmergable_proc_bind(int a) {
; #pragma omp parallel proc_bind(close)
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void unmergable_num_threads(int a) {
; #pragma omp parallel num_threads(a)
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void unmergable_seq_call(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     foo();
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void merge_seq(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     a = a + 1;
; #pragma omp parallel
;     {
;         use(a);
;     }
;     use(a);
; }
; void merge_seq_float(float f, ptr p) {
; #pragma omp parallel
;     {
;         use(f);
;     }
;     *p = f + 3.14f;
; #pragma omp parallel
;     {
;         use(f);
;     }
; }
; void merge_seq_firstprivate(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     a = a + 1;
; #pragma omp parallel firstprivate(a)
;     {
;         use(a);
;     }
;     use(a);
; }
; void merge_seq_sink_lt(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     {
;         int b = (int)&b;
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void merge_seq_par_use(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     int b = a + 1;
; #pragma omp parallel
;     {
;         use(a);
;         use(b);
;     }
; }
; void merge_cancellable_regions(int cancel1, int cancel2)
; {
; #pragma omp parallel
;     {
;         if(cancel1) {
; #pragma omp cancel parallel
;         }
;     }
; #pragma omp parallel
;     {
;         if (cancel2) {
; #pragma omp cancel parallel
;         }
;     }
; }
; void merge_cancellable_regions_seq(int cancel1, int cancel2)
; {
; #pragma omp parallel
;     {
;         if(cancel1) {
; #pragma omp cancel parallel
;         }
;     }
;     cancel2 = !cancel1;
; #pragma omp parallel
;     {
;         if (cancel2) {
; #pragma omp cancel parallel
;         }
;     }
; }
; void merge_3(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void merge_3_seq(int a, int b) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     b = a + 1;
; #pragma omp parallel
;     {
;         use(a);
;     }
;     b = b + a;
; #pragma omp parallel
;     {
;         use(a);
;     }
;     use(b);
; }
; void unmergable_3_seq_call(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
;     foo();
; #pragma omp parallel
;     {
;         use(a);
;     }
;     foo();
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void unmergable_3_proc_bind(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
; #pragma omp parallel proc_bind(close)
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void unmergable_3_num_threads(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
; #pragma omp parallel num_threads(a)
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
; void merge_2_unmergable_1(int a) {
; #pragma omp parallel
;     {
;         use(a);
;     }
; #pragma omp parallel
;     {
;         use(a);
;     }
;     foo();
; #pragma omp parallel
;     {
;         use(a);
;     }
; }
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

%struct.ident_t = type { i32, i32, i32, i32, ptr }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8

define dso_local void @merge(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined., ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..1, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined.(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

declare dso_local void @use(i32) local_unnamed_addr

declare !callback !1 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr

define internal void @.omp_outlined..1(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @unmergable_proc_bind(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
  store i32 %a, ptr %a.addr, align 4
  call void @__kmpc_push_proc_bind(ptr nonnull @1, i32 %0, i32 3)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..2, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..3, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..2(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr

declare void @__kmpc_push_proc_bind(ptr, i32, i32) local_unnamed_addr

define internal void @.omp_outlined..3(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @unmergable_num_threads(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
  store i32 %a, ptr %a.addr, align 4
  call void @__kmpc_push_num_threads(ptr nonnull @1, i32 %0, i32 %a)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..4, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..5, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..4(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

declare void @__kmpc_push_num_threads(ptr, i32, i32) local_unnamed_addr

define internal void @.omp_outlined..5(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @unmergable_seq_call(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..6, ptr nonnull %a.addr)
  call void (...) @foo()
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..7, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..6(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

declare dso_local void @foo(...) local_unnamed_addr

define internal void @.omp_outlined..7(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @merge_seq(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..8, ptr nonnull %a.addr)
  %0 = load i32, ptr %a.addr, align 4
  %add = add nsw i32 %0, 1
  store i32 %add, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..9, ptr nonnull %a.addr)
  %1 = load i32, ptr %a.addr, align 4
  call void @use(i32 %1)
  ret void
}

define internal void @.omp_outlined..8(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..9(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @merge_seq_float(float %f, ptr nocapture %p) local_unnamed_addr  {
entry:
  %f.addr = alloca float, align 4
  store float %f, ptr %f.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..10, ptr nonnull %f.addr)
  %0 = load float, ptr %f.addr, align 4
  %add = fadd float %0, 0x40091EB860000000
  store float %add, ptr %p, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..11, ptr nonnull %f.addr)
  ret void
}

define internal void @.omp_outlined..10(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %f)  {
entry:
  %0 = load float, ptr %f, align 4
  %conv = fptosi float %0 to i32
  call void @use(i32 %conv)
  ret void
}

define internal void @.omp_outlined..11(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %f)  {
entry:
  %0 = load float, ptr %f, align 4
  %conv = fptosi float %0 to i32
  call void @use(i32 %conv)
  ret void
}

define dso_local void @merge_seq_firstprivate(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..12, ptr nonnull %a.addr)
  %0 = load i32, ptr %a.addr, align 4
  %add = add nsw i32 %0, 1
  store i32 %add, ptr %a.addr, align 4
  %a.casted.sroa.0.0.insert.ext = zext i32 %add to i64
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..13, i64 %a.casted.sroa.0.0.insert.ext)
  %1 = load i32, ptr %a.addr, align 4
  call void @use(i32 %1)
  ret void
}

define internal void @.omp_outlined..12(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..13(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., i64 %a)  {
entry:
  %a.addr.sroa.0.0.extract.trunc = trunc i64 %a to i32
  call void @use(i32 %a.addr.sroa.0.0.extract.trunc)
  ret void
}

define dso_local void @merge_seq_sink_lt(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  %b = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..14, ptr nonnull %a.addr)
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %b)
  %0 = ptrtoint ptr %b to i64
  %1 = trunc i64 %0 to i32
  store i32 %1, ptr %b, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %b)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..15, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..14(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)

declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)

define internal void @.omp_outlined..15(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @merge_seq_par_use(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  %b = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..16, ptr nonnull %a.addr)
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %b)
  %0 = load i32, ptr %a.addr, align 4
  %add = add nsw i32 %0, 1
  store i32 %add, ptr %b, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 2, ptr @.omp_outlined..17, ptr nonnull %a.addr, ptr nonnull %b)
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %b)
  ret void
}

define internal void @.omp_outlined..16(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..17(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a, ptr nocapture nonnull readonly align 4 dereferenceable(4) %b)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  %1 = load i32, ptr %b, align 4
  call void @use(i32 %1)
  ret void
}

define dso_local void @merge_cancellable_regions(i32 %cancel1, i32 %cancel2) local_unnamed_addr  {
entry:
  %cancel1.addr = alloca i32, align 4
  %cancel2.addr = alloca i32, align 4
  store i32 %cancel1, ptr %cancel1.addr, align 4
  store i32 %cancel2, ptr %cancel2.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..18, ptr nonnull %cancel1.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..19, ptr nonnull %cancel2.addr)
  ret void
}

define internal void @.omp_outlined..18(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel1)  {
entry:
  %0 = load i32, ptr %cancel1, align 4
  %tobool.not = icmp eq i32 %0, 0
  br i1 %tobool.not, label %if.end, label %if.then

if.then:                                          ; preds = %entry
  %1 = load i32, ptr %.global_tid., align 4
  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
  ret void

if.end:                                           ; preds = %entry
  ret void
}

declare i32 @__kmpc_cancel(ptr, i32, i32) local_unnamed_addr

define internal void @.omp_outlined..19(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel2)  {
entry:
  %0 = load i32, ptr %cancel2, align 4
  %tobool.not = icmp eq i32 %0, 0
  br i1 %tobool.not, label %if.end, label %if.then

if.then:                                          ; preds = %entry
  %1 = load i32, ptr %.global_tid., align 4
  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
  ret void

if.end:                                           ; preds = %entry
  ret void
}

define dso_local void @merge_cancellable_regions_seq(i32 %cancel1, i32 %cancel2) local_unnamed_addr  {
entry:
  %cancel1.addr = alloca i32, align 4
  %cancel2.addr = alloca i32, align 4
  store i32 %cancel1, ptr %cancel1.addr, align 4
  store i32 %cancel2, ptr %cancel2.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..20, ptr nonnull %cancel1.addr)
  %0 = load i32, ptr %cancel1.addr, align 4
  %tobool.not = icmp eq i32 %0, 0
  %lnot.ext = zext i1 %tobool.not to i32
  store i32 %lnot.ext, ptr %cancel2.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..21, ptr nonnull %cancel2.addr)
  ret void
}

define internal void @.omp_outlined..20(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel1)  {
entry:
  %0 = load i32, ptr %cancel1, align 4
  %tobool.not = icmp eq i32 %0, 0
  br i1 %tobool.not, label %if.end, label %if.then

if.then:                                          ; preds = %entry
  %1 = load i32, ptr %.global_tid., align 4
  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
  ret void

if.end:                                           ; preds = %entry
  ret void
}

define internal void @.omp_outlined..21(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel2)  {
entry:
  %0 = load i32, ptr %cancel2, align 4
  %tobool.not = icmp eq i32 %0, 0
  br i1 %tobool.not, label %if.end, label %if.then

if.then:                                          ; preds = %entry
  %1 = load i32, ptr %.global_tid., align 4
  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
  ret void

if.end:                                           ; preds = %entry
  ret void
}

define dso_local void @merge_3(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..22, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..23, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..24, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..22(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..23(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..24(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @merge_3_seq(i32 %a, i32 %b) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..25, ptr nonnull %a.addr)
  %0 = load i32, ptr %a.addr, align 4
  %add = add nsw i32 %0, 1
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..26, ptr nonnull %a.addr)
  %1 = load i32, ptr %a.addr, align 4
  %add1 = add nsw i32 %add, %1
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..27, ptr nonnull %a.addr)
  call void @use(i32 %add1)
  ret void
}

define internal void @.omp_outlined..25(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..26(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..27(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @unmergable_3_seq_call(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..28, ptr nonnull %a.addr)
  call void (...) @foo()
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..29, ptr nonnull %a.addr)
  call void (...) @foo()
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..30, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..28(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..29(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..30(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @unmergable_3_proc_bind(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..31, ptr nonnull %a.addr)
  call void @__kmpc_push_proc_bind(ptr nonnull @1, i32 %0, i32 3)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..32, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..33, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..31(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..32(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..33(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @unmergable_3_num_threads(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..34, ptr nonnull %a.addr)
  %1 = load i32, ptr %a.addr, align 4
  call void @__kmpc_push_num_threads(ptr nonnull @1, i32 %0, i32 %1)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..35, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..36, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..34(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..35(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..36(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define dso_local void @merge_2_unmergable_1(i32 %a) local_unnamed_addr  {
entry:
  %a.addr = alloca i32, align 4
  store i32 %a, ptr %a.addr, align 4
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..37, ptr nonnull %a.addr)
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..38, ptr nonnull %a.addr)
  call void (...) @foo()
  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..39, ptr nonnull %a.addr)
  ret void
}

define internal void @.omp_outlined..37(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..38(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}

define internal void @.omp_outlined..39(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
entry:
  %0 = load i32, ptr %a, align 4
  call void @use(i32 %0)
  ret void
}


!llvm.module.flags = !{!0, !3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!2}
!2 = !{i64 2, i64 -1, i64 -1, i1 true}
!3 = !{i32 7, !"openmp", i32 50}
; CHECK-LABEL: define {{[^@]+}}@merge
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP1]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK:       omp_region.end4:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split.split:
; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body5:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK:       seq.par.merged2:
; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK:       omp_region.body5.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    br label [[OMP_REGION_END4]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP1]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK:       omp_region.end4:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split.split:
; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body5:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK:       seq.par.merged2:
; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK:       omp_region.body5.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    br label [[OMP_REGION_END4]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP1]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK:       omp_region.end4:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split.split:
; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body5:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK:       seq.par.merged2:
; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK:       omp_region.body5.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    br label [[OMP_REGION_END4]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK-NEXT:    call void @use(i32 [[CONV]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP1]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK-NEXT:    ret void
; CHECK:       if.end:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK:       omp_region.end:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split:
; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK:       omp_region.end4:
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split.split:
; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK:       omp_region.body5:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK:       seq.par.merged2:
; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK:       omp.par.merged.split.split.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK:       omp_region.body5.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK-NEXT:    br label [[OMP_REGION_END4]]
; CHECK:       omp_region.body:
; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK:       seq.par.merged:
; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK:       omp.par.merged.split:
; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK:       omp_region.body.split:
; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    br label [[OMP_REGION_END]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK:       omp_parallel:
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK:       omp.par.outlined.exit:
; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
; CHECK:       omp.par.exit.split:
; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK:       entry.split.split:
; CHECK-NEXT:    call void (...) @foo()
; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  omp.par.entry:
; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK:       omp.par.outlined.exit.exitStub:
; CHECK-NEXT:    ret void
; CHECK:       omp.par.region:
; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK:       omp.par.merged:
; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK:       entry.split:
; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK:       omp.par.region.split:
; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK:       omp.par.pre_finalize:
; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void
; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    call void @use(i32 [[TMP0]])
; CHECK-NEXT:    ret void

; CHECK2-LABEL: define {{[^@]+}}@merge
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@unmergable_proc_bind
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..2, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..3, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@unmergable_num_threads
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..4, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..5, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@unmergable_seq_call
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..6, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (...) @foo()
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..7, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
; CHECK2-SAME: (float [[F:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
; CHECK2-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
; CHECK2-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    store float [[F]], ptr [[F_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[F_RELOADED]], ptr [[GEP_F_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT:    store ptr [[F_ADDR]], ptr [[GEP_F_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT:    store ptr [[P]], ptr [[GEP_P]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_float..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load ptr, ptr [[GEP_F_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
; CHECK2-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load ptr, ptr [[GEP_F_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
; CHECK2-NEXT:    [[LOADGEP_P:%.*]] = load ptr, ptr [[GEP_P]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = load float, ptr [[LOADGEP_F_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_F_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_F_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
; CHECK2-NEXT:    store float [[ADD]], ptr [[LOADGEP_P]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[F:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK2-NEXT:    call void @use(i32 [[CONV]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[F:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
; CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
; CHECK2-NEXT:    call void @use(i32 [[CONV]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8
; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT:    store ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_firstprivate..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
; CHECK2-NEXT:    [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_A_ADDR]], align 4
; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
; CHECK2-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
; CHECK2-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK2-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK2-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK2-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
; CHECK2-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT:    store ptr [[B]], ptr [[GEP_B]], align 8
; CHECK2-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_par_use..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
; CHECK2-NEXT:    [[LOADGEP_B:%.*]] = load ptr, ptr [[GEP_B]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_A_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_B]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_B]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[B:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP1]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions
; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8
; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[CANCEL1_ADDR]], ptr [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT:    store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18
; CHECK2-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK2:       if.then:
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK2-NEXT:    ret void
; CHECK2:       if.end:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19
; CHECK2-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK2:       if.then:
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK2-NEXT:    ret void
; CHECK2:       if.end:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
; CHECK2-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[CANCEL1_RELOADED]], ptr [[GEP_CANCEL1_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT:    store ptr [[CANCEL1_ADDR]], ptr [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT:    store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions_seq..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load ptr, ptr [[GEP_CANCEL1_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_CANCEL1_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK2-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
; CHECK2-NEXT:    store i32 [[LNOT_EXT]], ptr [[LOADGEP_CANCEL2_ADDR]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20
; CHECK2-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[CANCEL1:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK2:       if.then:
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK2-NEXT:    ret void
; CHECK2:       if.end:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..21
; CHECK2-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[CANCEL2:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK2:       if.then:
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
; CHECK2-NEXT:    ret void
; CHECK2:       if.end:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_3
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr, ptr }, align 8
; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
; CHECK2-NEXT:    store ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 3
; CHECK2-NEXT:    store ptr [[ADD1_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3_seq..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
; CHECK2-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 3
; CHECK2-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_A_RELOADED]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
; CHECK2:       omp_region.end:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
; CHECK2-NEXT:    br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
; CHECK2:       omp_region.end4:
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split.split.split:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp_region.body5:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
; CHECK2:       seq.par.merged2:
; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
; CHECK2-NEXT:    store i32 [[ADD1]], ptr [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split.split.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
; CHECK2:       omp_region.body5.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
; CHECK2-NEXT:    br label [[OMP_REGION_END4]]
; CHECK2:       omp_region.body:
; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
; CHECK2:       seq.par.merged:
; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
; CHECK2:       omp.par.merged.split:
; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
; CHECK2:       omp_region.body.split:
; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    br label [[OMP_REGION_END]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..25
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_seq_call
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..28, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (...) @foo()
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..29, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (...) @foo()
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..30, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..28
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..30
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..31, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..32, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..33, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..31
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..32
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..33
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_num_threads
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..34, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..35, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..36, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..34
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..35
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..36
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1
; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
; CHECK2:       omp_parallel:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[STRUCTARG]])
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
; CHECK2:       omp.par.exit:
; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK2:       entry.split.split:
; CHECK2-NEXT:    call void (...) @foo()
; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..39, ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A_ADDR]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK2-NEXT:  omp.par.entry:
; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
; CHECK2:       omp.par.region:
; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
; CHECK2:       omp.par.merged:
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[LOADGEP_A_ADDR]])
; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
; CHECK2:       entry.split:
; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
; CHECK2:       omp.par.region.split:
; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
; CHECK2:       omp.par.pre_finalize:
; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
; CHECK2:       omp.par.exit.exitStub:
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..37
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..38
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
;
; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..39
; CHECK2-SAME: (ptr noalias nofree readnone captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A:%.*]]) {
; CHECK2-NEXT:  entry:
; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
; CHECK2-NEXT:    ret void
;
