Автоматизация ввода-вывода CUDA/Пример передачи списка векторов на устройство

Материал из CAMaaS preliminary wiki
Перейти к навигации Перейти к поиску
#include <iostream>
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"

struct Vector
{
	std::size_t cData;
	int* data;
};

struct ListNode
{
	ListNode* next;
	Vector node_data;
};

struct List
{
	std::size_t cNode;
	ListNode* head;
};

__global__ void freeMemoryList(List* pList)
{
	auto headNode = pList->head;
	while (pList->head != nullptr)
	{
		auto tmp = headNode->next;
		deleteData(&headNode->node_data);
		delete headNode;
		pList->head = headNode = tmp;

	}
}

int main(void)
{
//host_list
	List host_list;

	host_list.head = new ListNode();
	host_list.head->next = new ListNode();
	host_list.head->next->next = new ListNode();
	std::size_t cSize = 3;
	host_list.cNode = cSize;

	host_list.head->node_data.data = new int[cSize];
	for (std::size_t i = 0; i < cSize; ++i)
		host_list.head->node_data.data[i] = i;
	host_list.head->next->node_data.data = new int[cSize];
	for (std::size_t i = 0; i < cSize; ++i)
		host_list.head->next->node_data.data[i] = i + cSize;
	host_list.head->next->next->node_data.data = new int[cSize];
	for (std::size_t i = 0; i < cSize; ++i)
		host_list.head->next->next->node_data.data[i] = i + cSize * 2;

	host_list.head->node_data.cData =
		host_list.head->next->node_data.cData =
			host_list.head->next->next->node_data.cData = cSize;
	
//Vector
	Vector* d_vec1, *d_vec2, *d_vec3;

	cudaError_t err = cudaMalloc((void **)&d_vec1, sizeof(Vector));
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of dev_vector failed!");
	
	err = cudaMemcpy(d_vec1, &host_list.head->node_data, sizeof(Vector), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to cudaMemcpy failed!");

	err = cudaMalloc((void **)&d_vec2, sizeof(Vector));
	if (err != cudaSuccess)
	{
		fprintf(stderr, "cudaMalloc of dev_vector failed!");
	}

	err = cudaMemcpy(d_vec2, &host_list.head->next->node_data, sizeof(Vector), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to cudaMemcpy failed!");

	err = cudaMalloc((void **)&d_vec3, sizeof(Vector));
	if (err != cudaSuccess)
	{
		fprintf(stderr, "cudaMalloc of dev_vector failed!");
	}

	err = cudaMemcpy(d_vec3, &host_list.head->next->next->node_data, sizeof(Vector), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to cudaMemcpy failed!");

	int* vec_data1, *vec_data2, *vec_data3;
	err = cudaMalloc((void **)&vec_data1, sizeof(int) * cSize);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of host_vector.data failed!");

	err = cudaMemcpy(vec_data1, host_list.head->node_data.data, sizeof(int) * cSize, cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_data failed!");

	err = cudaMemcpy(&(d_vec1->data), &vec_data1, sizeof(int*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

	err = cudaMalloc((void **)&vec_data2, sizeof(int) * cSize);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of host_vector.data failed!");

	err = cudaMemcpy(vec_data2, host_list.head->next->node_data.data, sizeof(int) * cSize, cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_data failed!");

	err = cudaMemcpy(&(d_vec2->data), &vec_data2, sizeof(int*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

	err = cudaMalloc((void **)&vec_data3, sizeof(int) * cSize);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of host_vector.data failed!");

	err = cudaMemcpy(vec_data3, host_list.head->next->next->node_data.data, sizeof(int) * cSize, cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_data failed!");

	err = cudaMemcpy(&(d_vec3->data), &vec_data3, sizeof(int*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

//ListNode

	ListNode* d_ln1, *d_ln2, *d_ln3;

	err = cudaMalloc((void **)&d_ln1, sizeof(ListNode));
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of dev_data failed!");

	err = cudaMemcpy(d_ln1, &host_list.head, sizeof(ListNode), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to cudaMemcpy failed!");

	err = cudaMemcpy(&(d_ln1->node_data), &d_vec1, sizeof(Vector*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

	err = cudaMalloc((void **)&d_ln2, sizeof(ListNode));
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of dev_data failed!");

	err = cudaMemcpy(d_ln2, &host_list.head->next, sizeof(ListNode), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to cudaMemcpy failed!");

	err = cudaMemcpy(&(d_ln2->node_data), &d_vec2, sizeof(Vector*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

	err = cudaMalloc((void **)&d_ln3, sizeof(ListNode));
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of dev_data failed!");

	err = cudaMemcpy(d_ln3, &host_list.head->next->next, sizeof(ListNode), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to cudaMemcpy failed!");

	err = cudaMemcpy(&(d_ln3->node_data), &d_vec3, sizeof(Vector*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

//List
	List* dev_list;

	err = cudaMalloc((void **)&dev_list, sizeof(List));
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMalloc of dev_list failed!");

	err = cudaMemcpy(dev_list, &host_list, sizeof(List), cudaMemcpyHostToDevice);
	if (err != cudaSuccess) 
		fprintf(stderr, "cudaMemcpy from host_list to device_list failed!");

	err = cudaMemcpy(&(dev_list->head), &d_ln1, sizeof(ListNode*), cudaMemcpyHostToDevice);
	if (err != cudaSuccess)
		fprintf(stderr, "cudaMemcpy to dev_vector->data failed!");

	err = cudaMemcpy(&host_vector, dev_vector, sizeof(Vector), cudaMemcpyDeviceToHost);
	if (err != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy from dev_vector to host_vector failed!");
	}

	kernel<<<1,1>>>(dev_list);
	freeMemoryList << <1, 1 >> > (dev_list);

	return 0;
}