Portfolio

Graphics, Templates and Native Arrays ( 2024 )

This are simple c classes written for the preprocessor. For more information about the preprocessor click here.

Year 2024
Author Kaj Dijkstra
Languages pure C
Frameworks Opengl, X11 and FreeType
Size of Sourc code 200kb
Size of Binary 93.8kb
Lines of code 9 588

Templates

It is now possible to create templates in the preprocessor. You can define a template using the "template" keyword. And between the "<" and ">" symbols, you write the arguments. Then everywhere you write the value that you put between the "<" and ">" signs ("T" in this case) in that class definition will be replaced with the values you put in between the "<" and ">" signs when you define an instance of the class ("Vector" in this case).


template< T >
class vector{

	int capacity 		= 10;

	int total 			= 0;

	T * items 			= malloc( 10000000 );

	int length()
	{

		return this->total;

	}

	T get( int index )
	{

		return this->items[index];

	}

	void set( int index, T item )
	{

		if ( index >= 0 && index < this->total ){

			this->items[ index ] = item;

		}

	}

	void resize( int capacity )
	{

		T * items = realloc( this->items, sizeof( T ) * capacity );


		this->items 	= items;

		this->capacity 	= capacity;

	}

	void add( T item )
	{

		if ( this->capacity == this->total ){

			this->resize( this->capacity * 2 );
	
		}

		this->items[ this->total++ ] = item;

	}

	int array_push( T item ) {

		this->add( item );
		
		return  this->total;

	}

	void unshift( T item ) {

		int length	= this->total;

		this->total++;

		if ( this->capacity == this->total ){

			this->resize( this->capacity * 2 );

		}

		for ( int i = length - 1; i >= 0; --i ) {

			this->items[ i + 1 ] = this->items[ i ];

		}

		this->items[ 0 ] = item;

	}

}

Now if you instantiate the above class "vector," you need to add argument values to that instantiation.



	main() {

		vector< int > * example	= new vector();

	}

The preprocessor will detect you created a new instance. With in this case the argument value " int " that are in between "<" and ">". And create an extra version of the class in the preprocessing stage.

vector.h:


	typedef struct vector_int{

		int capacity;

		int total;

		int * items;


	} vector_int;

	int vector_int_length( vector_int * this );

	int vector_int_get( vector_int * this, int index );

	void vector_int_set( vector_int * this, int index,  int item );

	void vector_int_resize( vector_int * this, int capacity );

	void vector_int_add( vector_int * this, int item );

	void vector_int_delete( vector_int * this, int index );

	int vector_int_array_push( vector_int * this, int item );

	void vector_int_unshift( vector_int * this, int item );


vector.c:




int vector_int_length( vector_int * this ) {

	return this->total;

}


int vector_int_get( vector_int * this, int index ) {

	return this->items[index];

}

void vector_int_set( vector_int * this, int index,  int item ) {

	if ( index >= 0 && index < this->total ){

		this->items[ index ] = item;

	}

}

void vector_int_resize( vector_int * this, int capacity ) {

	int * items = realloc( this->items, sizeof( int ) * capacity );


	this->items 	= items;

	this->capacity 	= capacity;

}

void vector_int_add( vector_int * this, int item ) {

	if ( this->capacity == this->total ){

		vector_int_resize( this,  this->capacity * 2 );

	}

	this->items[ this->total++ ] = item;

}


void vector_int_delete( vector_int * this, int index ) {

	if ( index < 0 || index >= this->total ){

		return;

	}

}

int vector_int_array_push( vector_int * this, int item ) {

		vector_int_add( this,  item );
		
		return  this->total;

}

void vector_int_unshift( vector_int * this, int item ) {

	int length	= this->total;

	this->total++;

	if ( this->capacity == this->total ){

		vector_int_resize( this,  this->capacity * 2 );

	}

	for ( int i = length - 1; i >= 0; --i ) {

		this->items[ i + 1 ] = this->items[ i ];

	}

	this->items[ 0 ] = item;

}

vector_int vector_int_new() {

	vector_int instance;

	instance.capacity =  10;

	instance.total =  0;

	instance.items =  malloc( 10000000 );

	return instance;

}

vector_int * vector_int_newPointer() {

	struct vector_int * pointer = malloc( sizeof ( struct vector_int ) );

	pointer->capacity =  10;

	pointer->total =  0;

	pointer->items =  malloc( 10000000 );

	return pointer;

}


As you can see all the " P " occurences are replaced with "int" in this case.



		vector< int > * inputA = new vector();

		for (int i = 0; i < 100; ++i)
		{

			inputA->add( i );

		}

		inputA->set( 0, 10 );

		inputA->unshift( 10 );

		int lastInteger 	= inputA->pop();

		int count		= inputA->length();

		for (int i = 0; i < count; ++i)
		{

			int currentInteger = inputA->get( i );

			printf( "%i\n", currentInteger );

		}

This will be rewritten to:


	vector_int * inputA = vector_int_newPointer();

	for (int i = 0; i < 10; ++i)
	{

		vector_int_add( inputA,  i );

	}

	vector_int_set( inputA,  0, 10 );

	vector_int_unshift( inputA,  10 );

	int count		= vector_int_length( inputA );

	for (int i = 0; i < count; ++i)
	{

		int currentInteger = vector_int_get( inputA,  i );

		printf( "%i\n", currentInteger );

	}

10 10 1 2 3 4 5 6 7 8 9

Using this mechanism you can also write vectors based on classes ( vector2, vector3, any other class ) instead of basic types ( int, float, uint ...).



	vector< vector2 > * inputA = new vector();

	for (int i = 0; i < 10; ++i)
	{

		vector2 vector2Instance	= new vector( i, i * 2);

		inputA->add( vector2Instance );

	}

	vector2 example	= new vector( 100, 100 );

	inputA->set( 0, example );

	inputA->unshift( example );

	int count		= inputA->length();

	for (int i = 0; i < count; ++i)
	{

		vector2 vector2Instance = inputA->get( i );

		printf( "x:	%f	y+: %f \n", vector2Instance.x, vector2Instance.y );

	}

x: 100.000000 y: 100.000000 x: 100.000000 y: 100.000000 x: 1.000000 y: 2.000000 x: 2.000000 y: 4.000000 x: 3.000000 y: 6.000000 x: 4.000000 y: 8.000000 x: 5.000000 y: 10.000000 x: 6.000000 y: 12.000000 x: 7.000000 y: 14.000000 x: 8.000000 y: 16.000000 x: 9.000000 y: 18.000000

Compute shader

The OpenGL compute shader can be used to perform high performance computations and calculations in parallel. It works something like this: you write an little shader program, That program is executed on the GPU, This program is written in glsl. glsl looks very much like c. You feed some information to it in the form of blocks representing ( arrays, structs, vectors, ints ) etc. You execute the shader program. And you can write the value back to a block. Then you can use that block that contains ( arrays, structs, vectors, ints ) in 2 ways. Load that block inside another shader program, You can use that block for example in another compute shader, Vertex shader or pixel shader This is very fast because the information just stays on your GPU Memory. Or you can download the block information from your GPU memory back to your CPU, To your computer memory. And use that information inside your c code.

Blocks and shader storage buffers

Blocks are collections of abstract objects that live in the memory of the graphics card. A block can exist out of basic types like int's, floats, vector2, vector3 etc. But you can also create structs and add those to a block, Or even more crazy create an array of structs. These blocks are then used to upload this information from your processor and computer memory to your GPU memory from within your normal c code.

You can also update your block values inside your shader program, then afterwards download that information back to your normal computer memory and access that in your c code.

Binding address

Important to note about blocks is the binding address


layout( std430, binding = 0 ) 

This binding address is important when you want to use blocks accross different shader programs and renderPasses. If you create the same exact block with the same exact binding address number in different shader programs. And execute these programs after each other, Then these blocks will be shared between these two programs. So the output of one program can be used as the input of the next program. This happens automatically when the block numbers are the same and the block is exactly of the same form.

Here an demostration how to use the compute shaders with the preprocessor. The compute shader is now alot more streamlined. As you can see you can upload the typed array demostrated before directly into the OpenGL Compute program.

"addition.comp"


#version 460

layout ( local_size_x = 10, local_size_y = 10, local_size_z = 1 ) in;

layout( std430, binding = 0 ) buffer inputBlock {

	vec2 array_a[ 100 ];

	vec2 array_b[ 100 ];

};

layout( std430, binding = 1 ) buffer outputBlock {

	vec2 array_c[ 100 ];

};

void main() {

	uint index = gl_LocalInvocationIndex;

	vec2 a = array_a[ index ];

	vec2 b = array_b[ index ];

	array_c[ index ] = a + b;

}

"renderPass.compute.c"


class compute extends renderPass{

	struct program 	* 	program;

	int active						= true;

	prepare() {

		shader * computeShader = new shader( GL_COMPUTE_SHADER );

		computeShader->loadFromFile( "assets/shaders/addition.comp" );


		this->program 			= new program();

		this->program->addShader( computeShader );

		this->program->create();


		vector< vector2 > * inputA = new vector();

		for (int i = 0; i < 100; ++i)
		{

			vector2 a	= new vector2( i, i );

			inputA->add( a );

		}

		vector< vector2 > * inputB = new vector();

		for (int i = 0; i < 100; ++i)
		{

			vector2 a	= new vector2( 0, 10 );

			inputB->add( a );

		}

		block * inputBlock			= this->program->getBlock( "inputBlock" );

		inputBlock->setMemberArray( "array_a[0]", ( float * ) inputA->items );

		inputBlock->setMemberArray( "array_b[0]", ( float * ) inputB->items );

		inputBlock->upload();

	}

	render() {

		this->program->use();

		this->program->bindBlock( "inputBlock");

		this->program->bindBlock( "outputBlock");


		glDispatchCompute( 1, 1, 1 );

		glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT );

	} 

}

The vector2 Arrays are directly inserted into the method "setMemberArray". "inputBlock" is the block name defined above. "array_a[0]" and "array_b[0]" are the block members that are arrays in this case. outputBlock is the name of the other block and is the outputblock for this renderPass, And can be used as input for another renderPass.

And to add the renderPass to the pipeline:


	class opengl {

		setupPipeline() {

			compute * computePass 	= new compute();

			this->pipeline->addRenderPass( computePass );

		}

	}

Now in the second renderPass we are going todo 3 things:

Read the value from the previous program by creating the same block with the same binding address number.

Create a new block to store the result.

Perform the actual calculation in the main function. Storing the square value of outputBlock into the outputBlock2 block



layout( std430, binding = 1 )  buffer outputBlock {

	vec2 array_c[ 100 ];

};

Now in the c program on the cpu we can read this values and write the result to a new block, Then we are going to download this block array from the GPU back to c directly into an typed array in c. The only code needed for this is this code:



	block * outputBlock			= this->program->getBlock( "outputBlock2" );

	vector< vector2 > * output		= outputBlock->getMemberArray( "array_d[0]" );



#version 460


//shared float shareData[1024]; // Shared between all threads in work group.

layout ( local_size_x = 10, local_size_y = 10, local_size_z = 1 ) in;



layout( std430, binding = 2 )  buffer outputBlock2 {

	vec2 array_d[ 100 ];

};

layout( std430, binding = 1 )  buffer outputBlock {

	vec2 array_c[ 100 ];

};

void main() {

	uint index = gl_LocalInvocationIndex;

	array_d[ index ] = array_c[ index ] * array_c[ index ];

}



class compute2 extends renderPass{

	struct program 			* 	program;

	int active			= true;


	prepare() {

		printf("\n\n\n		Prepare renderPass Compute 2\n\n\n\n\n");

		shader * computeShader = new shader( GL_COMPUTE_SHADER );

		computeShader->loadFromFile( "assets/shaders/addition2.comp" );


		this->program 			= new program();

		this->program->addShader( computeShader );

		this->program->create();

	}

	render() {

		if( this->active ) {

			this->program->use();

			// bind the outputBlock2 this is the block where the result is written to
			this->program->bindBlock( "outputBlock2" );
			
			// execute the program and perform the calculation
			glDispatchCompute( 1, 1, 1 );


			// Get the block that contains our result
			block * outputBlock			= this->program->getBlock( "outputBlock2" );

			// Download the array from the GPU to computer memory into an array of vector2's
			// This is done in one call and the cheapest way of doing this, Because the vector2 array
			// is already aligned and of the same form as the OpenGL vec2 array this is very fast.
			vector< vector2 > * output	= outputBlock->getMemberArray( "array_d[0]" );

			int count					= output->length();

			for (int i = 0; i < count; ++i)
			{

				vector2 currentVector = output->get( i );

				// display the result
				printf("%i  = %f %f \n", i, i, currentVector.x, currentVector.y );

			}

			printf("length:	%i\n\n", count);
		
			this->active = false;

		}

	} 

}



	class opengl {

		setupPipeline() {

			compute * computePass 		= new compute();

			compute2 * computePass2 	= new compute2();
			

			this->pipeline->addRenderPass( computePass );

			this->pipeline->addRenderPass( computePass2 );

		}

	}

Result

You can upload your class arrays like vector2, vector3, int, float etc. In one step to the Compute shader. Perform different stages / renderPasses and share the result between renderPasses on the GPU, Then you can download your results back to an array with classes ( vector2, vector3, int and float ) also in one step. Uploading and Downloading these arrays of classes just takes just one OpenGL call and is thus very fast.

Additional to that you can also share this information anywhere else inside your graphical render pipeline, inside your vertex shader or pixels shader or geometry shader, Just by defining the same block with the same binding address, These Shader Storage Buffers are very powerfull way to Share, Write and Read between all your shader programs.

Uniform Interface Blocks

Beside shader storage blocks, You also have uniform blocks. They are defined almost in the same way as a shader storage block, But with one diffence. You write uniform instead of buffer. You cant write information back to an uniform block like you can with a shader storage block, And uniform blocks are limited in size. But Uniform blocks are supposed to be more efficient then the shader storage blocks.



layout ( packed, binding = 0 ) buffer events
{

	vec2 mouse;

	vec2 window;

};



layout ( packed, binding = 0 ) uniform events
{

	vec2 mouse;

	vec2 window;

};

void main() {

	quad currentMesh	= meshArray[ meshIndex ];

	vec2 mousePosition	= mouse / window;

	mousePosition 		-= .5;

	mousePosition.y		*= -1.;

	mousePosition 		*= 2.0;

	gl_Position.xy += mousePosition;

	gl_Position.w = 1.0;

}


You can manipulate uniform blocks with exactly the same methods and objects as with which you manipulate shader storage blocks. For c it are all just "blocks". You can perform a "getBlock" method on the "program" object to get the uniform or storage block. And then set or get the block members of that block using "setMember", "setMemberArray" or "getMember".



	render() {

		block * eventsBlock 	= currentProgram->getBlock( "events" );

		eventsBlock->setMember( "window", currentEvent->screen->size );

		eventsBlock->setMember( "mouse", currentEvent->mouse->position );

		eventsBlock->upload();

	}