118 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
			
		
		
	
	
			118 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
| #version 450 core
 | |
| #extension GL_KHR_memory_scope_semantics : enable
 | |
| #extension GL_NV_cooperative_matrix : enable
 | |
| #extension GL_NV_integer_cooperative_matrix : enable
 | |
| #extension GL_EXT_shader_explicit_arithmetic_types : enable
 | |
| #extension GL_EXT_buffer_reference : enable
 | |
| 
 | |
| layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 | |
| 
 | |
| const int X = 8;
 | |
| layout(constant_id = 0) const int Y = 2;
 | |
| const int Z = X*Y;
 | |
| 
 | |
| icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC;
 | |
| icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3];
 | |
| ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC;
 | |
| ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3];
 | |
| 
 | |
| int iarr[miC.length()];
 | |
| int iarr2[miC2[1].length()];
 | |
| int uarr[muC.length()];
 | |
| int uarr2[muC2[1].length()];
 | |
| 
 | |
| const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1);
 | |
| const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1);
 | |
| 
 | |
| struct S { int a; int b; int c; };
 | |
| 
 | |
| const S s = S(12, 23, 34);
 | |
| 
 | |
| layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
 | |
|     uint y[1024*1024];
 | |
|     uint x[];
 | |
| } block;
 | |
| 
 | |
| layout(set = 0, binding = 0) coherent buffer Block16 {
 | |
|     int8_t y[1024*1024];
 | |
|     int8_t x[];
 | |
| 
 | |
|     Block b;
 | |
| } block8;
 | |
| 
 | |
| icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; }
 | |
| ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); }
 | |
| 
 | |
| layout(constant_id = 2) const int SC = 1;
 | |
| ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
 | |
| 
 | |
| // sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16>
 | |
| shared uvec4 shmatrix[16*16*2/16];
 | |
| 
 | |
| void main()
 | |
| {
 | |
|     ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
 | |
|     icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
 | |
| 
 | |
|     mu = mu + mu;
 | |
|     mu = mu - mu;
 | |
|     mi = -mi;
 | |
|     mi = mi * int8_t(2);
 | |
| 
 | |
|     fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu);
 | |
|     fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu);
 | |
|     fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi);
 | |
|     fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi);
 | |
| 
 | |
|     uint8_t x = mu[1];
 | |
|     mi[0] = int8_t(x);
 | |
| 
 | |
|     coopMatLoadNV(mi, block.x, 16, 128, false);
 | |
|     coopMatStoreNV(mi, block.x, 16, 128, false);
 | |
|     coopMatLoadNV(mu, block8.x, 16, 128, false);
 | |
|     coopMatStoreNV(mu, block8.x, 16, 128, false);
 | |
|     coopMatLoadNV(mi, block8.b.x, 16, 128, false);
 | |
|     coopMatStoreNV(mi, block8.b.x, 16, 128, false);
 | |
| 
 | |
|     ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A;
 | |
|     ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B;
 | |
|     ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C;
 | |
|     ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D;
 | |
|     D = coopMatMulAddNV(A, B, C);
 | |
| 
 | |
|     int l = D.length();
 | |
| 
 | |
| 
 | |
|     icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
 | |
|     a[3][0] = int8_t(1);
 | |
| 
 | |
|     int md1 = mD[1];
 | |
| 
 | |
|     md1 += (mi += mi)[1234];
 | |
| 
 | |
|     muC2[0] = muC2[1];
 | |
|     muC2[1][0] = (miC2[2][0]);
 | |
| 
 | |
|     coopMatLoadNV(mi, block.y, 16, 128, false);
 | |
|     coopMatStoreNV(mi, block.y, 16, 128, false);
 | |
|     coopMatLoadNV(mu, block8.y, 16, 128, false);
 | |
|     coopMatStoreNV(mu, block8.y, 16, 128, false);
 | |
| 
 | |
|     icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1;
 | |
|     ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2;
 | |
| 
 | |
|     p1 = ineg(p1);
 | |
|     p2 = umul(p2);
 | |
| 
 | |
|     p1 /= p1;
 | |
|     p2 /= p2;
 | |
| 
 | |
|     p1 *= int8_t(2);
 | |
|     p2 *= uint8_t(4);
 | |
| 
 | |
|     icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms;
 | |
|     coopMatLoadNV(ms, shmatrix, 1, 2, false);
 | |
|     coopMatStoreNV(ms, shmatrix, 1, 2, false);
 | |
| 
 | |
| }
 |