Hi,
Today I figured out that when running application like sample 20_HugeObjectCount(toggle object moving and object group),
the bottleneck of URHO3D_PROFILE(ReinsertToOctree) comes from StaticModelGroup::OnWorldBoundingBoxUpdate() in Drawable::GetWorldBoundingBox().
Maybe it could be updated multi-threaded?
I added the following in Octree.cpp
void UpdateDrawablesBBWork(const WorkItem* item, unsigned threadIndex)
{
Drawable** start = reinterpret_cast(item->start_);
Drawable** end = reinterpret_cast(item->end_);
while (start != end)
{
Drawable* drawable = *start;
if (drawable)
const BoundingBox& box = drawable->GetWorldBoundingBox();
++start;
}
}
...
void Octree::Update(const FrameInfo& frame)
{
...
// Reinsert drawables that have been moved or resized, or that have been newly added to the octree and do not sit inside
// the proper octant yet
if (!drawableUpdates_.Empty())
{
URHO3D_PROFILE(ReinsertToOctree);
/*****parallel update world bounding box*****/
WorkQueue* queue = GetSubsystem();
int numWorkItems = queue->GetNumThreads() + 1; // Worker threads + main thread
int drawablesPerItem = Max((int)(drawableUpdates_.Size() / numWorkItems), 1);
PODVector::Iterator start = drawableUpdates_.Begin();
// Create a work item for each thread
for (int i = 0; i < numWorkItems; ++i)
{
SharedPtr item = queue->GetFreeItem();
item->priority_ = M_MAX_UNSIGNED;
item->workFunction_ = UpdateDrawablesBBWork;
item->aux_ = NULL;
PODVector::Iterator end = drawableUpdates_.End();
if (i < numWorkItems - 1 && end - start > drawablesPerItem)
end = start + drawablesPerItem;
item->start_ = &(*start);
item->end_ = &(*end);
queue->AddWorkItem(item);
start = end;
}
queue->Complete(M_MAX_UNSIGNED);
/*****end parallel update world bounding box*****/
for (PODVector::Iterator i = drawableUpdates_.Begin(); i != drawableUpdates_.End(); ++i)
{
Drawable* drawable = *i;
drawable->updateQueued_ = false;
Octant* octant = drawable->GetOctant();
const BoundingBox& box = drawable->GetWorldBoundingBox();
...
}
The result is satisfying, I got 113FPS => 133 FPS for sample 20_HugeObjectCount(toggle object moving and object group).
I ran some other samples, looks normal.
But I am unfamilier with engine code, I guess I should ask here whether this modification is valid.
Thanks!