1
0
Fork 0
mirror of https://gitlab.com/niansa/libcrosscoro.git synced 2025-03-06 20:53:32 +01:00
libcrosscoro/inc/coro/task_container.hpp
Josh Baldwin e9b225e42f
io_scheduler inline support (#79)
* io_scheduler inline support

* add debug info for io_scheduler size issue

* move poll info into its own file

* cleanup for feature

* Fix valgrind introduced use after free with inline processing

Running the coroutines inline with event processing caused
a use after free bug with valgrind detected in the inline
tcp server/client benchmark code.  Basically if an event
and a timeout occured in the same time period because the
inline processing would resume _inline_ with the event or the
timeout -- if the timeout and event occured in the same epoll_wait()
function call then the second one's coroutine stackframe would
already be destroyed upon resuming it so the poll_info->processed
check would be reading already free'ed memory.

The solution to this was to introduce a vector of coroutine handles
which are appended into on each epoll_wait() iteration of events
and timeouts, and only then after the events and timeouts are
deduplicated are the coroutine handles resumed.

This new vector has elided a malloc in the timeout function, but
there is still a malloc to extract the poll infos from the timeout
multimap data structure.  The vector is also on the class member
list and is only ever cleared, it is possible with a monster set
of timeouts that this vector could grow extremely large, but
I think that is worth the price of not re-allocating it.
2021-04-11 15:07:01 -06:00

273 lines
9.8 KiB
C++

#pragma once
#include "coro/concepts/executor.hpp"
#include "coro/task.hpp"
#include <atomic>
#include <iostream>
#include <list>
#include <mutex>
#include <vector>
namespace coro
{
template<concepts::executor executor_type>
class task_container
{
public:
using task_position = std::list<std::size_t>::iterator;
struct options
{
/// The number of task spots to reserve space for upon creating the container.
std::size_t reserve_size{8};
/// The growth factor for task space in the container when capacity is full.
double growth_factor{2};
};
/**
* @param e Tasks started in the container are scheduled onto this executor. For tasks created
* from a coro::io_scheduler, this would usually be that coro::io_scheduler instance.
* @param opts Task container options.
*/
task_container(executor_type& e, const options opts = options{.reserve_size = 8, .growth_factor = 2})
: m_growth_factor(opts.growth_factor),
m_executor(e)
{
m_tasks.resize(opts.reserve_size);
for (std::size_t i = 0; i < opts.reserve_size; ++i)
{
m_task_indexes.emplace_back(i);
}
m_free_pos = m_task_indexes.begin();
}
task_container(const task_container&) = delete;
task_container(task_container&&) = delete;
auto operator=(const task_container&) -> task_container& = delete;
auto operator=(task_container&&) -> task_container& = delete;
~task_container()
{
// This will hang the current thread.. but if tasks are not complete thats also pretty bad.
while (!empty())
{
garbage_collect();
}
}
enum class garbage_collect_t
{
/// Execute garbage collection.
yes,
/// Do not execute garbage collection.
no
};
/**
* Stores a user task and starts its execution on the container's thread pool.
* @param user_task The scheduled user's task to store in this task container and start its execution.
* @param cleanup Should the task container run garbage collect at the beginning of this store
* call? Calling at regular intervals will reduce memory usage of completed
* tasks and allow for the task container to re-use allocated space.
*/
auto start(coro::task<void> user_task, garbage_collect_t cleanup = garbage_collect_t::yes) -> void
{
m_size.fetch_add(1, std::memory_order::relaxed);
std::scoped_lock lk{m_mutex};
if (cleanup == garbage_collect_t::yes)
{
gc_internal();
}
// Only grow if completely full and attempting to add more.
if (m_free_pos == m_task_indexes.end())
{
m_free_pos = grow();
}
// Store the task inside a cleanup task for self deletion.
auto index = *m_free_pos;
m_tasks[index] = make_cleanup_task(std::move(user_task), m_free_pos);
// Mark the current used slot as used.
std::advance(m_free_pos, 1);
// Start executing from the cleanup task to schedule the user's task onto the thread pool.
m_tasks[index].resume();
}
/**
* Garbage collects any tasks that are marked as deleted. This frees up space to be re-used by
* the task container for newly stored tasks.
* @return The number of tasks that were deleted.
*/
auto garbage_collect() -> std::size_t
{
std::scoped_lock lk{m_mutex};
return gc_internal();
}
/**
* @return The number of tasks that are awaiting deletion.
*/
auto delete_task_size() const -> std::size_t
{
std::atomic_thread_fence(std::memory_order::acquire);
return m_tasks_to_delete.size();
}
/**
* @return True if there are no tasks awaiting deletion.
*/
auto delete_tasks_empty() const -> bool
{
std::atomic_thread_fence(std::memory_order::acquire);
return m_tasks_to_delete.empty();
}
/**
* @return The number of active tasks in the container.
*/
auto size() const -> std::size_t { return m_size.load(std::memory_order::relaxed); }
/**
* @return True if there are no active tasks in the container.
*/
auto empty() const -> bool { return size() == 0; }
/**
* @return The capacity of this task manager before it will need to grow in size.
*/
auto capacity() const -> std::size_t
{
std::atomic_thread_fence(std::memory_order::acquire);
return m_tasks.size();
}
/**
* Will continue to garbage collect and yield until all tasks are complete. This method can be
* co_await'ed to make it easier to wait for the task container to have all its tasks complete.
*
* This does not shut down the task container, but can be used when shutting down, or if your
* logic requires all the tasks contained within to complete, it is similar to coro::latch.
*/
auto garbage_collect_and_yield_until_empty() -> coro::task<void>
{
while (!empty())
{
garbage_collect();
co_await m_executor.yield();
}
}
private:
/**
* Grows each task container by the growth factor.
* @return The position of the free index after growing.
*/
auto grow() -> task_position
{
// Save an index at the current last item.
auto last_pos = std::prev(m_task_indexes.end());
std::size_t new_size = m_tasks.size() * m_growth_factor;
for (std::size_t i = m_tasks.size(); i < new_size; ++i)
{
m_task_indexes.emplace_back(i);
}
m_tasks.resize(new_size);
// Set the free pos to the item just after the previous last item.
return std::next(last_pos);
}
/**
* Interal GC call, expects the public function to lock.
*/
auto gc_internal() -> std::size_t
{
std::size_t deleted{0};
if (!m_tasks_to_delete.empty())
{
for (const auto& pos : m_tasks_to_delete)
{
// This doesn't actually 'delete' the task, it'll get overwritten when a
// new user task claims the free space. It could be useful to actually
// delete the tasks so the coroutine stack frames are destroyed. The advantage
// of letting a new task replace and old one though is that its a 1:1 exchange
// on delete and create, rather than a large pause here to delete all the
// completed tasks.
// Put the deleted position at the end of the free indexes list.
m_task_indexes.splice(m_task_indexes.end(), m_task_indexes, pos);
}
deleted = m_tasks_to_delete.size();
m_tasks_to_delete.clear();
}
return deleted;
}
/**
* Encapsulate the users tasks in a cleanup task which marks itself for deletion upon
* completion. Simply co_await the users task until its completed and then mark the given
* position within the task manager as being deletable. The scheduler's next iteration
* in its event loop will then free that position up to be re-used.
*
* This function will also unconditionally catch all unhandled exceptions by the user's
* task to prevent the scheduler from throwing exceptions.
* @param user_task The user's task.
* @param pos The position where the task data will be stored in the task manager.
* @return The user's task wrapped in a self cleanup task.
*/
auto make_cleanup_task(task<void> user_task, task_position pos) -> coro::task<void>
{
// Immediately move the task onto the executor.
co_await m_executor.schedule();
try
{
// Await the users task to complete.
co_await user_task;
}
catch (const std::exception& e)
{
// TODO: what would be a good way to report this to the user...? Catching here is required
// since the co_await will unwrap the unhandled exception on the task.
// The user's task should ideally be wrapped in a catch all and handle it themselves, but
// that cannot be guaranteed.
std::cerr << "coro::task_container user_task had an unhandled exception e.what()= " << e.what() << "\n";
}
catch (...)
{
// don't crash if they throw something that isn't derived from std::exception
std::cerr << "coro::task_container user_task had unhandle exception, not derived from std::exception.\n";
}
std::scoped_lock lk{m_mutex};
m_tasks_to_delete.push_back(pos);
// This has to be done within scope lock to make sure this coroutine task completes before the
// task container object destructs -- if it was waiting on .empty() to become true.
m_size.fetch_sub(1, std::memory_order::relaxed);
co_return;
}
/// Mutex for safely mutating the task containers across threads, expected usage is within
/// thread pools for indeterminate lifetime requests.
std::mutex m_mutex{};
/// The number of alive tasks.
std::atomic<std::size_t> m_size{};
/// Maintains the lifetime of the tasks until they are completed.
std::vector<task<void>> m_tasks{};
/// The full set of indexes into `m_tasks`.
std::list<std::size_t> m_task_indexes{};
/// The set of tasks that have completed and need to be deleted.
std::vector<task_position> m_tasks_to_delete{};
/// The current free position within the task indexes list. Anything before
/// this point is used, itself and anything after is free.
task_position m_free_pos{};
/// The amount to grow the containers by when all spaces are taken.
double m_growth_factor{};
/// The executor to schedule tasks that have just started.
executor_type& m_executor;
};
} // namespace coro