Автоматизация ввода-вывода CUDA/Пример передачи списка векторов на хост

Материал из CAMaaS preliminary wiki
Перейти к навигации Перейти к поиску
#include <iostream>
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"

struct Vector
{
	std::size_t cData;
	int* data;
};

struct ListNode
{
	ListNode* next;
	Vector node_data;
};

struct List
{
	std::size_t cNode;
	ListNode* head;
};

__device__ void fill_vector(int* x, size_t n)
{
	for (int i = 0; i < n; i++)
		x[i] = 5 * n + i * n;
}

__device__ void generateVector(Vector *output_vector, std::size_t vec_size)
{
	int *p = new int[vec_size];
	fill_vector(p, vec_size);
	output_vector->data = p;
	output_vector->cData = vec_size;
}

__global__ void generateListNodes(List* output_list)
{
	output_list->head = new ListNode(); //1
	output_list->head->next = new ListNode(); //2
	output_list->head->next->next = new ListNode(); //3
	generateVector(&output_list->head->node_data, 2);
	generateVector(&output_list->head->next->node_data, 3);
	generateVector(&output_list->head->next->next->node_data, 4);
	output_list->cNode = 3;
}

__global__ void getVectorData(Vector* input_vector, int* output)
{
	for (auto i = 0; i < input_vector->cData; i++) 
	{
		output[i] = input_vector->data[i];
	}
}

__global__ void getListNodeData(List* input_list, Vector* output_vector)
{
	*output_vector = input_list->head->next->node_data;
}

__device__ void deleteData(Vector* pData)
{
	delete [] pData->data;
}

__global__ void freeMemoryList(List* pList)
{
	auto headNode = pList->head;
	while(pList->head != nullptr)
	{
		auto tmp = headNode->next;
		deleteData(&headNode->node_data);
		delete headNode;
		pList->head = headNode = tmp;

	}
}

int main(void)
{
	List* dev_list, host_list;
	Vector* dev_vector, host_vector;
	int* dev_data, *host_data;

	cudaError_t err = cudaMalloc((void **)&dev_list, sizeof(List));
	if (err != cudaSuccess)
	{
		fprintf(stderr, "cudaMalloc of dev_list failed!");
	}

	generateListNodes <<<1, 1 >>> (dev_list);

	err = cudaMemcpy(&host_list, dev_list, sizeof(List), cudaMemcpyDeviceToHost);
	if (err != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy from dev_list to host_list failed!");
	}

	host_list.head = new ListNode();
	host_list.head->next = new ListNode();
	host_list.head->next->next = new ListNode();

	err = cudaMalloc((void **)&dev_vector, sizeof(Vector));
	if (err != cudaSuccess)
	{
		fprintf(stderr, "cudaMalloc of dev_vector failed!");
	}

	getListNodeData<<<1, 1>>> (dev_list, dev_vector);

	err = cudaMemcpy(&host_vector, dev_vector, sizeof(Vector), cudaMemcpyDeviceToHost);
	if (err != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy from dev_vector to host_vector failed!");
	}

	std::size_t host_vector_size = host_vector.cData;

	err = cudaMalloc((void **)&dev_data, sizeof(int) * host_vector_size);
	if (err != cudaSuccess)
	{
		fprintf(stderr, "cudaMalloc of dev_data failed!");
	}

	host_data = new int[host_vector_size];

	getVectorData<<<1, 1>>> (dev_vector, dev_data);

	err = cudaMemcpy(host_data, dev_data, sizeof(int) * host_vector_size, cudaMemcpyDeviceToHost);
	if (err != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy from dev_data to host_data failed!");
	}

	host_vector.data = host_data;

	for (int i = 0; i < host_vector_size; i++) 
	{
		fprintf(stdout, "index = %d value = %d\n", i, host_vector.data[i]);
	}

	freeMemoryList<<<1, 1>>>(dev_list);
	cudaFree(dev_vector);
	cudaFree(dev_data);
	cudaFree(dev_list);
	delete [] host_data;

	return 0;
}