I'm not seeing a performance increase with the Job System. Help
Asked Answered
H

1

0

Using the jobs or not I still get 30~40 fps tops :frowning:

SpawnZombies.cs

void Start ()
{
    zombieList = new List<Zombie>();
    for (int i = 0; i < 10000; i++)
    {
        Transform zombieT = Instantiate(prefab, new Vector3(UnityEngine.Random.Range(-8, 8), UnityEngine.Random.Range(-5, 5)), Quaternion.identity);
        zombieList.Add(new Zombie
        {
            transform = zombieT,
            moveY = UnityEngine.Random.Range(1, 2)
        }); 
    }
}

Update ()
{ 
    if (useJobs)
    {
        NativeArray<float3> position = new NativeArray<float3>(zombieList.Count, Allocator.TempJob);
        NativeArray<float> moveY = new NativeArray<float>(zombieList.Count, Allocator.TempJob);

        for (int i = 0; i < zombieList.Count; i++)
        {
            position _= zombieList*.transform.position;
            moveY _= zombieList*.moveY;
        }

        StructTaskJob02 taskJob02 = new StructTaskJob02{
            deltaTime = Time.deltaTime,
            position = position,
            moveY = moveY
        };

        JobHandle jobHandle = taskJob02.Schedule(zombieList.Count, 100);
        jobHandle.Complete();

        for (int i = 0; i < zombieList.Count; i++)
        {
            zombieList_.transform.position = position*;
            zombieList_.moveY = moveY*;
        }

        position.Dispose();
        moveY.Dispose();
    }
    else
    {
        foreach (Zombie zombie in zombieList)
        {
            zombie.transform.position += new Vector3(0, zombie.moveY * Time.deltaTime);

            if (zombie.transform.position.y > 5f)
            {
                zombie.moveY = -math.abs(zombie.moveY);
            }

            if (zombie.transform.position.y < -5f)
            {
                zombie.moveY = +math.abs(zombie.moveY);
            }
        }
    }
}

[BurstCompile]
public struct StructTaskJob02 : IJobParallelFor
{
    public NativeArray<float3> position;
    public NativeArray<float> moveY;
    [ReadOnly]public float deltaTime;
    public void Execute(int index)
    {
        position[index] += new float3(0, moveY[index] * deltaTime, 0f);

        if (position[index].y > 5f)
        {
            moveY[index] = -math.abs(moveY[index]);
        }

        if (position[index].y < -5f)
        {
            moveY[index] = +math.abs(moveY[index]);
        }
    }
}

public class Zombie
{
    public Transform transform;
    public float moveY;
}
Hemistich answered 21/6, 2023 at 8:35 Comment(0)
P
0

Diagnosing the issue

Vanilla code path (6 ms):

profiler timeline

vanilla / deep profile, hierarchy view:

profiler deep profile hierarchy
As you can see the biggest cpu cost in vanilla code path is zombie.transform.position += new Vector3(); line. But that’s beside the point.

Jobified code path (10 ms):
profiler timeline

Question arises immediately here: *Why is jobified code path slower than the old-timey one?

Maybe it’s this job at fault? But no, not at all; it’s so fast it’s hardly even visible without a zoom (it takes less than 0.1 ms spread across multiple cores).
alt text
So, what is the issue here then? To find out we need to enable Deep Profile and switch to Hierarchy view in Profiler window, then look for OurScriptName.Update entry.
Be aware that Deep Profile makes everything execute a lot slower, so absolute time values are no longer useful, but relative time values (%) still are.

And here they are - our offenders in plain sight:

profiler deep profile hierarchy

As you can clearly see here, it’s caused by:

  • NativeArray read & write operations, which are super slow outside Burst-compiled jobs

  • = transform.position

  • transform.position =

  • and few other minor operations, like casting, enumeration and List reading, all magnified by this huge 10k loop.

Which lines are causing the biggest problem? It’s

position _= zombieList*.transform.position;
and
zombieList_.transform.position = position;

Fixing the issue

TransformAccess to the rescue. It fixes all the problems mentioned above and leaves you with the eternal one: “how to draw 10k moving meshes under 60fps”. Which is a better problem to have (as it’s arbitrary one).
profiler timeline TransformAccess applied

using System.Collections.Generic;

using UnityEngine;
using UnityEngine.Jobs;

using Unity.Entities;
using Unity.Mathematics;
using Unity.Collections;
using Unity.Jobs;

using Random = UnityEngine.Random;

public class TestSystem : MonoBehaviour
{
    [SerializeField] GameObject _prefab = null;
    TransformAccessArray _transformsAccess = default(TransformAccessArray);
    NativeArray<float3> _velocities = default(NativeArray<float3>);
    public JobHandle Dependency = default(JobHandle);

    void Start ()
    {
        int numInstances = 10000;
        var transforms = new Transform[ numInstances ];
        _velocities = new NativeArray<float3>( numInstances , Allocator.Persistent );
        for( int i=0 ; i<numInstances ; i++ )
        {
            GameObject instance = Instantiate(
                _prefab ,
                new Vector3( Random.Range(-8f,8f) , Random.Range(-5f,5f) ) ,
                Quaternion.identity
            );
            transforms *= instance.transform;
            _velocities *= new float3{ y=Random.Range(1f,2f) };
        }
        _transformsAccess = new TransformAccessArray( transforms );
    }

    void OnDestroy ()
    {
        Dependency.Complete();
        if( _velocities.IsCreated ) _velocities.Dispose();
        if( _transformsAccess.isCreated ) _transformsAccess.Dispose();
    }

    void Update ()
    {
        Dependency.Complete();

        var job = new MyJob{
            deltaTime	= Time.deltaTime ,
            velocity	= _velocities
        };
        Dependency = job.Schedule( _transformsAccess );
    }

    [Unity.Burst.BurstCompile]
    public struct MyJob : IJobParallelForTransform
    {
        public NativeArray<float3> velocity;
        [ReadOnly] public float deltaTime;
        void IJobParallelForTransform.Execute ( int index , TransformAccess transform )
        {
            float3 vel = velocity[index];
            float3 pos = transform.position;
            {
                pos += vel * deltaTime;
                if( pos.y>5f ) vel = -math.abs(vel);
                if( pos.y<-5f ) vel = math.abs(vel);
            }
            transform.position = pos;
            velocity[index] = vel;
        }
    }
}

But wait, there’s more!

10k animated entities + Hybrid Renderer V2, >100 FPS
The same behaviour, but converted to DOTS. 10k entities with Hybrid Renderer V2 enabled gave me 100-200 FPS :T

 using UnityEngine;
 
 using Unity.Entities;
 using Unity.Mathematics;
 using Unity.Transforms;
 
 using Random = UnityEngine.Random;
 
 public class SpawnZombiesFixedV2 : MonoBehaviour
 {
 
	[SerializeField] GameObject _prefab = null;
 
 	void Start ()
 	{
 		var world = World.DefaultGameObjectInjectionWorld;
        var command = world.EntityManager;
 
 		Entity entityPrefab;
 		using( var blobAssetStore = new BlobAssetStore() )
        {
 			var conversionSettings = GameObjectConversionSettings.FromWorld( world , blobAssetStore );
			entityPrefab = GameObjectConversionUtility.ConvertGameObjectHierarchy( _prefab , conversionSettings );
 		}
 		command.AddComponent<Velocity>( entityPrefab );
 
 		for( int i=0 ; i<10000 ; i++ )
 		{
 			Entity instance = command.Instantiate( entityPrefab );
 			command.SetComponentData( instance , new Translation{
 				Value = new float3{ x=Random.Range(-8f,8f) , y=Random.Range(-5f,5f) }
 			} );
 			command.SetComponentData( instance , new Velocity{
 				Value = new float3{ y=Random.Range(1f,2f) }
 			} );
 		}
 	}
 
 }
 
 public class SpawnZombiesFixedV2System : SystemBase
 {
 	protected override void OnUpdate ()
 	{
 		float deltaTime = Time.DeltaTime;
 
 		Entities
			.WithName("update_positions_job")
 			.ForEach( ( ref Translation position , in Velocity vel ) =>
 			{
				position.Value += vel.Value * deltaTime;
 			} )
 			.Schedule();
 
 		Entities
			.WithName("update_velocity_job")
 			.ForEach( ( ref Velocity vel , in Translation position ) =>
 			{
 				if( position.Value.y>5f ) vel.Value = -math.abs(vel.Value);
 				if( position.Value.y<-5f ) vel.Value = math.abs(vel.Value);
 			} )
 			.Schedule();
 	}
 }
 
 public struct Velocity : IComponentData
 {
 	public float3 Value;
 }
Phenanthrene answered 21/11, 2023 at 15:30 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.