Compute shaders:
    Compute shaders are programs that run on the graphics card, outside of the normal rendering pipeline.
    They can be used for massively parallel GPGPU algorithms,or to accelerate parts of game rendering.
    In order to efficiently use them, an in-depth knowledge of GPU architectures and parallel algorithms is often needed;
    as well as knowledge of DirectCompute, OpenGL Compute, OpenCL, CUDA, or OpenCL.


常用的计算架构有DirectCompute, OpenGL Compute, OpenCL, CUDA, or OpenCL.

GPGPU–General-purpose computing on graphics processing units 图形处理器通用计算

Unity的ComputeShader十分接近DirectCompute(微软推出的,随DirectX11一起发布),Unity引入的Compute Shader

Windows and Windows Store, with a DirectX 11 or DirectX 12 graphics API and Shader Model 5.0 GPU
macOS and iOS using Metal graphics API
Android, Linux and Windows platforms with Vulkan API
Modern OpenGL platforms (OpenGL 4.3 on Linux or Windows; OpenGL ES 3.1 on Android). Note that Mac OS X does not support OpenGL 4.3
Modern consoles (Sony PS4 and Microsoft Xbox One)



Compute Shader

        //  用来在gpu上实现集群效果

        //Compute Shader的入口函数
        #pragma kernel CSMain

        struct Boid
            float3 pos;
            float3 rot;
            float3 flockPos;
            float speed;
            float nearbyDis;
            float boidsCount;

        //声明要在Compute Shader中操作的数据
        RWStructuredBuffer<Boid> boidBuffer;
        float deltaTime;

        //Compute Shader执行的线程组,每个线程组又包含多个线程 ,默认创建的[numthreads(8,8,1)]
        //[numthreads(8,8,1)] 的意思就是在这个线程组中分配了8*8*1=64个线程,当然也可以用[numthreads(64,1,1)] 表示
        void CSMain (uint3 id : SV_DispatchThreadID)
            Boid boid = boidBuffer[id.x];

            float3 pos = boid.pos;
            float3 rot = boid.rot;

            float3 separation = float3(0.0, 0.0, 0.0);

            float3 alignment = float3(0.0, 0.0, 0.0);

            float3 cohesion = boid.flockPos;
            float3 tempCohesion = float3(0.0, 0.0, 0.0);

            float tempSpeed = 0;
            uint nearbyCount = 0;

            for (int i = 0; i < int(boid.boidsCount); i++)
                if (i != int(id.x))
                    Boid tempBoid = boidBuffer[i];
                    if (length(boid.pos - tempBoid.pos) < boid.nearbyDis)
                        separation += boid.pos - tempBoid.pos;

                        alignment += tempBoid.rot;

                        tempCohesion += tempBoid.pos;


            if (nearbyCount > 0)
                alignment *= 1 / nearbyCount;
                tempCohesion *= 1 / nearbyCount;

            cohesion += tempCohesion;

            float3 direction = alignment + separation + normalize(cohesion - boid.pos);

            boid.rot = lerp(boid.rot, normalize(direction), deltaTime * 4);

            boid.pos += boid.rot * boid.speed * deltaTime;

            boidBuffer[id.x] = boid;


        public struct GPUBoid
            public Vector3 pos, rot, flockPos;
            public float speed, nearbyDis, boidsCount;

        using System.Collections;
        using System.Collections.Generic;
        using UnityEngine;

        public class GPUFlock : MonoBehaviour {

            #region 字段

            public ComputeShader cshader;

            public GameObject boidPrefab;
            public int boidsCount;
            public float spawnRadius;
            public GameObject[] boidsGo;
            public GPUBoid[] boidsData;
            public float flockSpeed;
            public float nearbyDis;

            private Vector3 targetPos = Vector3.zero;
            private int kernelHandle;


            #region 方法

            void Start()
                this.boidsGo = new GameObject[this.boidsCount];
                this.boidsData = new GPUBoid[this.boidsCount];
                this.kernelHandle = cshader.FindKernel("CSMain");

                for (int i = 0; i < this.boidsCount; i++)
                    this.boidsData[i] = this.CreateBoidData();
                    this.boidsGo[i] = Instantiate(boidPrefab, this.boidsData[i].pos, Quaternion.Euler(this.boidsData[i].rot)) as GameObject;
                    this.boidsData[i].rot = this.boidsGo[i].transform.forward;

            GPUBoid CreateBoidData()
                GPUBoid boidData = new GPUBoid();
                Vector3 pos = transform.position + Random.insideUnitSphere * spawnRadius;
                Quaternion rot = Quaternion.Slerp(transform.rotation, Random.rotation, 0.3f);
                boidData.pos = pos;
                boidData.flockPos = transform.position;
                boidData.boidsCount = this.boidsCount;
                boidData.nearbyDis = this.nearbyDis;
                boidData.speed = this.flockSpeed + Random.Range(-0.5f, 0.5f);

                return boidData;

            void Update()

                this.targetPos += new Vector3(2f, 5f, 3f);
                this.transform.localPosition += new Vector3(
                    (Mathf.Sin(Mathf.Deg2Rad * this.targetPos.x) * -0.2f),
                    (Mathf.Sin(Mathf.Deg2Rad * this.targetPos.y) * 0.2f),
                    (Mathf.Sin(Mathf.Deg2Rad * this.targetPos.z) * 0.2f)

                ComputeBuffer buffer = new ComputeBuffer(boidsCount, 56);

                for (int i = 0; i < this.boidsData.Length; i++)
                    this.boidsData[i].flockPos = this.transform.position;

                //注意:和一般的Shader不同的是,compute shader和图形无关,因此在使用compute shader时不会
                //涉及到mesh、material这些内容,相反这些compute shader的设置和执行在C#脚本中,如下:
                cshader.SetBuffer(this.kernelHandle, "boidBuffer", buffer);
                cshader.SetFloat("deltaTime", Time.deltaTime);
                //分配线程组执行compute shader
                cshader.Dispatch(this.kernelHandle, this.boidsCount, 1, 1);


                for (int i = 0; i < this.boidsData.Length; i++)

                    this.boidsGo[i].transform.localPosition = this.boidsData[i].pos;

                        this.boidsGo[i].transform.rotation = Quaternion.LookRotation(this.boidsData[i].rot);


