46 lines
		
	
	
		
			1005 B
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			46 lines
		
	
	
		
			1005 B
		
	
	
	
		
			LLVM
		
	
	
	
; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
 | 
						|
target triple = "nvptx64-nvidia-cuda"
 | 
						|
 | 
						|
declare void @foo()
 | 
						|
declare void @llvm.nvvm.barrier0()
 | 
						|
 | 
						|
; syncthreads shouldn't be duplicated.
 | 
						|
; CHECK: .func call_syncthreads
 | 
						|
; CHECK: bar.sync
 | 
						|
; CHECK-NOT: bar.sync
 | 
						|
define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
 | 
						|
  br i1 %cond, label %L1, label %L2
 | 
						|
  br i1 %cond2, label %Ret, label %L1
 | 
						|
Ret:
 | 
						|
  ret void
 | 
						|
L1:
 | 
						|
  store i32 0, i32* %a
 | 
						|
  br label %L42
 | 
						|
L2:
 | 
						|
  store i32 1, i32* %a
 | 
						|
  br label %L42
 | 
						|
L42:
 | 
						|
  call void @llvm.nvvm.barrier0()
 | 
						|
  br label %Ret
 | 
						|
}
 | 
						|
 | 
						|
; Check that call_syncthreads really does trigger tail duplication.
 | 
						|
; CHECK: .func call_foo
 | 
						|
; CHECK: call
 | 
						|
; CHECK: call
 | 
						|
define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
 | 
						|
  br i1 %cond, label %L1, label %L2
 | 
						|
  br i1 %cond2, label %Ret, label %L1
 | 
						|
Ret:
 | 
						|
  ret void
 | 
						|
L1:
 | 
						|
  store i32 0, i32* %a
 | 
						|
  br label %L42
 | 
						|
L2:
 | 
						|
  store i32 1, i32* %a
 | 
						|
  br label %L42
 | 
						|
L42:
 | 
						|
  call void @foo()
 | 
						|
  br label %Ret
 | 
						|
}
 |