From 57127a1c37cf47d43664dd3b50d82e6434fb32d9 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 28 Apr 2026 14:06:48 +0200 Subject: [PATCH 1/4] fix: fix bug in buffer capacity setter --- include/hllm/configuration/edge.hpp | 79 +++++++++++++++-------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/include/hllm/configuration/edge.hpp b/include/hllm/configuration/edge.hpp index 615ab9e..bfbf688 100644 --- a/include/hllm/configuration/edge.hpp +++ b/include/hllm/configuration/edge.hpp @@ -15,38 +15,39 @@ class Edge { public: - #define __HLLM_PARTITION_DEFAULT_BUFFER_CAPACITY 1 +#define __HLLM_PARTITION_DEFAULT_BUFFER_CAPACITY 1 typedef uint64_t edgeIndex_t; - Edge(const nlohmann::json& js) { deserialize(js); } - Edge(const std::string& name, const size_t bufferCapacity, const size_t bufferSize = 0) - : _name(name), - _bufferCapacity(bufferCapacity), - _bufferSize(bufferSize) {} + Edge(const nlohmann::json &js) { deserialize(js); } + Edge(const std::string &name, const size_t bufferCapacity, const size_t bufferSize = 0) + : _name(name), + _bufferCapacity(bufferCapacity), + _bufferSize(bufferSize) + {} virtual ~Edge() = default; - __INLINE__ void setName(const std::string& name) { _name = name; } + __INLINE__ void setName(const std::string &name) { _name = name; } [[nodiscard]] __INLINE__ std::string getName() const { return _name; } [[nodiscard]] __INLINE__ std::string getProducer() const { return _producer; } [[nodiscard]] __INLINE__ std::string getConsumer() const { return _consumer; } - [[nodiscard]] __INLINE__ size_t getBufferCapacity() const { return _bufferCapacity; } - [[nodiscard]] __INLINE__ size_t getBufferSize() const { return _bufferSize; } - + [[nodiscard]] __INLINE__ size_t getBufferCapacity() const { return _bufferCapacity; } + [[nodiscard]] __INLINE__ size_t getBufferSize() const { return _bufferSize; } + [[nodiscard]] __INLINE__ nlohmann::json serialize() const { nlohmann::json js; - js["Name"] = _name; - js["Producer"] = _producer; - js["Consumer"] = _consumer; + js["Name"] = _name; + js["Producer"] = _producer; + js["Consumer"] = _consumer; js["Buffer Capacity"] = _bufferCapacity; - js["Buffer Size"] = _bufferSize; + js["Buffer Size"] = _bufferSize; return js; } - __INLINE__ void deserialize (const nlohmann::json& js) + __INLINE__ void deserialize(const nlohmann::json &js) { _name = hicr::json::getString(js, "Name"); if (js.contains("Producer")) _producer = hicr::json::getString(js, "Producer"); @@ -56,51 +57,51 @@ class Edge } // Functions to set the HiCR elements required for the creation of edge channels - __INLINE__ void setPayloadCommunicationManager(HiCR::CommunicationManager* const communicationManager) { _payloadCommunicationManager = communicationManager; } - __INLINE__ void setPayloadMemoryManager(HiCR::MemoryManager* const memoryManager) { _payloadMemoryManager = memoryManager; } + __INLINE__ void setPayloadCommunicationManager(HiCR::CommunicationManager *const communicationManager) { _payloadCommunicationManager = communicationManager; } + __INLINE__ void setPayloadMemoryManager(HiCR::MemoryManager *const memoryManager) { _payloadMemoryManager = memoryManager; } __INLINE__ void setPayloadMemorySpace(const std::shared_ptr memorySpace) { _payloadMemorySpace = memorySpace; } - __INLINE__ void setCoordinationCommunicationManager(HiCR::CommunicationManager* const communicationManager) { _coordinationCommunicationManager = communicationManager; } - __INLINE__ void setCoordinationMemoryManager(HiCR::MemoryManager* const memoryManager) { _coordinationMemoryManager = memoryManager; } + __INLINE__ void setCoordinationCommunicationManager(HiCR::CommunicationManager *const communicationManager) { _coordinationCommunicationManager = communicationManager; } + __INLINE__ void setCoordinationMemoryManager(HiCR::MemoryManager *const memoryManager) { _coordinationMemoryManager = memoryManager; } __INLINE__ void setCoordinationMemorySpace(const std::shared_ptr memorySpace) { _coordinationMemorySpace = memorySpace; } - __INLINE__ void setProducer(const std::string& partition) { _producer = partition; } - __INLINE__ void setConsumer(const std::string& partition) { _consumer = partition; } + __INLINE__ void setProducer(const std::string &partition) { _producer = partition; } + __INLINE__ void setConsumer(const std::string &partition) { _consumer = partition; } __INLINE__ void setPromptEdge(const bool isPromptEdge) { _isPromptEdge = isPromptEdge; } __INLINE__ void setResultEdge(const bool isResultEdge) { _isResultEdge = isResultEdge; } - __INLINE__ void setBufferCapacity(const size_t bufferCapacity) { _bufferCapacity = _bufferCapacity; } + __INLINE__ void setBufferCapacity(const size_t bufferCapacity) { _bufferCapacity = bufferCapacity; } __INLINE__ void setBufferSize(const size_t bufferSize) { _bufferSize = bufferSize; } - // Functions to set the HiCR elements required for the creation of edge channels - [[nodiscard]] __INLINE__ HiCR::CommunicationManager* getPayloadCommunicationManager () const { return _payloadCommunicationManager; } - [[nodiscard]] __INLINE__ HiCR::MemoryManager* getPayloadMemoryManager () const { return _payloadMemoryManager; } - [[nodiscard]] __INLINE__ std::shared_ptr getPayloadMemorySpace () const { return _payloadMemorySpace; } - [[nodiscard]] __INLINE__ HiCR::CommunicationManager* getCoordinationCommunicationManager() const { return _coordinationCommunicationManager; } - [[nodiscard]] __INLINE__ HiCR::MemoryManager* getCoordinationMemoryManager () const { return _coordinationMemoryManager; } - [[nodiscard]] __INLINE__ std::shared_ptr getCoordinationMemorySpace () const { return _coordinationMemorySpace; } - [[nodiscard]] __INLINE__ bool isPromptEdge() const { return _isPromptEdge; } - [[nodiscard]] __INLINE__ bool isResultEdge() const { return _isResultEdge; } + // Functions to set the HiCR elements required for the creation of edge channels + [[nodiscard]] __INLINE__ HiCR::CommunicationManager *getPayloadCommunicationManager() const { return _payloadCommunicationManager; } + [[nodiscard]] __INLINE__ HiCR::MemoryManager *getPayloadMemoryManager() const { return _payloadMemoryManager; } + [[nodiscard]] __INLINE__ std::shared_ptr getPayloadMemorySpace() const { return _payloadMemorySpace; } + [[nodiscard]] __INLINE__ HiCR::CommunicationManager *getCoordinationCommunicationManager() const { return _coordinationCommunicationManager; } + [[nodiscard]] __INLINE__ HiCR::MemoryManager *getCoordinationMemoryManager() const { return _coordinationMemoryManager; } + [[nodiscard]] __INLINE__ std::shared_ptr getCoordinationMemorySpace() const { return _coordinationMemorySpace; } + [[nodiscard]] __INLINE__ bool isPromptEdge() const { return _isPromptEdge; } + [[nodiscard]] __INLINE__ bool isResultEdge() const { return _isResultEdge; } private: std::string _name; std::string _producer; std::string _consumer; - size_t _bufferCapacity = __HLLM_PARTITION_DEFAULT_BUFFER_CAPACITY; - size_t _bufferSize; + size_t _bufferCapacity = __HLLM_PARTITION_DEFAULT_BUFFER_CAPACITY; + size_t _bufferSize; // This flag serves to indicate whether these are edges that connect to the request manager bool _isPromptEdge = false; bool _isResultEdge = false; // HiCR-specific objects to create the payload buffers. These are to be set at runtime - HiCR::CommunicationManager* _payloadCommunicationManager = nullptr; - HiCR::MemoryManager* _payloadMemoryManager = nullptr; - std::shared_ptr _payloadMemorySpace = nullptr; + HiCR::CommunicationManager *_payloadCommunicationManager = nullptr; + HiCR::MemoryManager *_payloadMemoryManager = nullptr; + std::shared_ptr _payloadMemorySpace = nullptr; // HiCR-specific objects to create the coordination buffers. These are to be set at runtime - HiCR::CommunicationManager* _coordinationCommunicationManager = nullptr; - HiCR::MemoryManager* _coordinationMemoryManager = nullptr; - std::shared_ptr _coordinationMemorySpace = nullptr; + HiCR::CommunicationManager *_coordinationCommunicationManager = nullptr; + HiCR::MemoryManager *_coordinationMemoryManager = nullptr; + std::shared_ptr _coordinationMemorySpace = nullptr; }; // class Base From 13c77197cbeb5aa3755ce770aeab8c95235c2768 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 28 Apr 2026 14:06:56 +0200 Subject: [PATCH 2/4] examples: fix launch command --- examples/basic/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/basic/meson.build b/examples/basic/meson.build index 4b64f6e..b186849 100644 --- a/examples/basic/meson.build +++ b/examples/basic/meson.build @@ -8,7 +8,7 @@ if get_option('buildTests') args: [ '-np', '1', '--oversubscribe', e.full_path(), - meson.current_source_dir() + '/config.json', + meson.current_source_dir() + '/policy.json', ], timeout: 60, suite: testSuite, From 82d941cadb9ae981b09c93e0dd13552f273ae4a2 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 28 Apr 2026 14:07:01 +0200 Subject: [PATCH 3/4] style: format files --- examples/basic/basic.cpp | 104 +++--- include/hllm/configuration/deployment.hpp | 230 +++++++------ include/hllm/configuration/partition.hpp | 52 +-- include/hllm/configuration/replica.hpp | 12 +- include/hllm/configuration/requestManager.hpp | 29 +- include/hllm/configuration/task.hpp | 38 +-- include/hllm/edge/base.hpp | 170 +++++----- include/hllm/edge/input.hpp | 112 ++++--- include/hllm/edge/message.hpp | 66 ++-- include/hllm/edge/output.hpp | 97 +++--- include/hllm/engine.hpp | 91 +++--- include/hllm/messages/base.hpp | 16 +- include/hllm/messages/data.hpp | 36 ++- include/hllm/messages/heartbeat.hpp | 22 +- include/hllm/messages/prompt.hpp | 30 +- include/hllm/messages/replicaReady.hpp | 16 +- include/hllm/prompt.hpp | 37 ++- include/hllm/realTimeAnalysis.hpp | 81 ++--- include/hllm/role.hpp | 86 +++-- include/hllm/roles/partition/base.hpp | 126 ++++---- include/hllm/roles/partition/coordinator.hpp | 301 +++++++++--------- include/hllm/roles/partition/replica.hpp | 217 +++++++------ include/hllm/roles/requestManager.hpp | 165 +++++----- include/hllm/session.hpp | 26 +- include/hllm/task.hpp | 61 ++-- 25 files changed, 1178 insertions(+), 1043 deletions(-) diff --git a/examples/basic/basic.cpp b/examples/basic/basic.cpp index cb74899..576677f 100644 --- a/examples/basic/basic.cpp +++ b/examples/basic/basic.cpp @@ -23,7 +23,7 @@ int main(int argc, char *argv[]) // Creating HWloc topology object hwloc_topology_t hwlocTopologyObject; - // Reserving memory for hwloc + // Reserving memory for hwloc hwloc_topology_init(&hwlocTopologyObject); // Initializing host (CPU) topology manager @@ -51,20 +51,20 @@ int main(int argc, char *argv[]) HiCR::backend::pthreads::Core core(1); // Getting managers - auto instanceManager = HiCR::backend::mpi::InstanceManager::createDefault(&argc, &argv); - auto communicationManager = std::make_shared(); - auto memoryManager = std::make_shared(); - auto workerComputeManager = std::make_shared(); - auto taskComputeManager = std::make_shared(); + auto instanceManager = HiCR::backend::mpi::InstanceManager::createDefault(&argc, &argv); + auto communicationManager = std::make_shared(); + auto memoryManager = std::make_shared(); + auto workerComputeManager = std::make_shared(); + auto taskComputeManager = std::make_shared(); // Creating taskr object nlohmann::json taskrConfig; - taskrConfig["Task Worker Inactivity Time (Ms)"] = 100; // Suspend workers if a certain time of inactivity elapses - taskrConfig["Task Suspend Interval Time (Ms)"] = 100; // Workers suspend for this time before checking back - taskrConfig["Minimum Active Task Workers"] = 1; // Have at least one worker active at all times - taskrConfig["Service Worker Count"] = 1; // Have one dedicated service workers at all times to listen for incoming messages - taskrConfig["Make Task Workers Run Services"] = true; // Workers will check for meta messages in between executions - auto taskr = std::make_unique(taskComputeManager.get(), workerComputeManager.get(), computeResources, taskrConfig); + taskrConfig["Task Worker Inactivity Time (Ms)"] = 100; // Suspend workers if a certain time of inactivity elapses + taskrConfig["Task Suspend Interval Time (Ms)"] = 100; // Workers suspend for this time before checking back + taskrConfig["Minimum Active Task Workers"] = 1; // Have at least one worker active at all times + taskrConfig["Service Worker Count"] = 1; // Have one dedicated service workers at all times to listen for incoming messages + taskrConfig["Make Task Workers Run Services"] = true; // Workers will check for meta messages in between executions + auto taskr = std::make_unique(taskComputeManager.get(), workerComputeManager.get(), computeResources, taskrConfig); // Instantiate RPC Engine auto rpcEngine = std::make_shared(*communicationManager, *instanceManager, *memoryManager, *workerComputeManager, bufferMemorySpace, computeResource); @@ -95,7 +95,7 @@ int main(int argc, char *argv[]) // Parsing request file contents to a JSON object std::ifstream hllmConfigFs(hllmConfigFilePath); - auto hllmConfigJs = nlohmann::json::parse(hllmConfigFs); + auto hllmConfigJs = nlohmann::json::parse(hllmConfigFs); // Parsing config file using hLLM deployment.deserialize(hllmConfigJs); @@ -114,7 +114,7 @@ int main(int argc, char *argv[]) auto instance = *instanceManager->getInstances().begin(); // Assigning instance ids to the partitions - for (auto p : deployment.getPartitions()) + for (auto p : deployment.getPartitions()) { // Getting instance Id that will run this partition (only one) const auto partitionInstanceId = instance->getId(); @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) p->addReplica(replica); } } - + // printf("%s\n", deployment.serialize().dump(2).c_str()); } @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) // Before deploying, we need to indicate what communication and memory managers to assign to each of the edges // This allows for flexibility to choose in which devices to place the payload and coordination buffers - for (const auto& edge : hllm.getDeployment().getEdges()) + for (const auto &edge : hllm.getDeployment().getEdges()) { edge->setPayloadCommunicationManager(communicationManager.get()); edge->setPayloadMemoryManager(memoryManager.get()); @@ -154,19 +154,18 @@ int main(int argc, char *argv[]) // Setting managers for partition-wise control messaging hllm.getDeployment().getControlBuffer().communicationManager = communicationManager.get(); - hllm.getDeployment().getControlBuffer().memoryManager = memoryManager.get(); - hllm.getDeployment().getControlBuffer().memorySpace = bufferMemorySpace; + hllm.getDeployment().getControlBuffer().memoryManager = memoryManager.get(); + hllm.getDeployment().getControlBuffer().memorySpace = bufferMemorySpace; // Declaring local value outside the functions for them to persist, but particular to each replica std::vector cathetusSquaredSummedOutput; cathetusSquaredSummedOutput.resize(_REPLICAS_PER_PARTITION); // Declaring the hLLM tasks for the application - hllm.registerFunction("Listen Request", [&](hLLM::Task *task) - { + hllm.registerFunction("Listen Request", [&](hLLM::Task *task) { // Getting raw request const auto &requestMemSlot = task->getInput("Catheti"); - const auto request = std::string((const char *)requestMemSlot->getPointer()); + const auto request = std::string((const char *)requestMemSlot->getPointer()); // Getting catheti values float cathetusAoutput; @@ -182,11 +181,10 @@ int main(int argc, char *argv[]) memoryManager->deregisterLocalMemorySlot(cathetusBMemorySlot); }); - hllm.registerFunction("Square Cathetus A", [&](hLLM::Task *task) - { + hllm.registerFunction("Square Cathetus A", [&](hLLM::Task *task) { // Getting input - const auto &cathetusMemSlot = task->getInput("Cathetus A"); - const float* cathetusA = (float *)cathetusMemSlot->getPointer(); + const auto &cathetusMemSlot = task->getInput("Cathetus A"); + const float *cathetusA = (float *)cathetusMemSlot->getPointer(); // Squaring cathetus float cathetusASquaredOutput = (*cathetusA) * (*cathetusA); @@ -197,11 +195,10 @@ int main(int argc, char *argv[]) memoryManager->deregisterLocalMemorySlot(cathetusASquaredMemorySlot); }); - hllm.registerFunction("Square Cathetus B", [&](hLLM::Task *task) - { + hllm.registerFunction("Square Cathetus B", [&](hLLM::Task *task) { // Getting input - const auto &cathetusMemSlot = task->getInput("Cathetus B"); - const float* cathetusB = (float *)cathetusMemSlot->getPointer(); + const auto &cathetusMemSlot = task->getInput("Cathetus B"); + const float *cathetusB = (float *)cathetusMemSlot->getPointer(); // Squaring cathetus float cathetusBSquaredOutput = (*cathetusB) * (*cathetusB); @@ -212,13 +209,12 @@ int main(int argc, char *argv[]) memoryManager->deregisterLocalMemorySlot(cathetusBSquaredMemorySlot); }); - hllm.registerFunction("Sum Catheti Squares", [&](hLLM::Task *task) - { + hllm.registerFunction("Sum Catheti Squares", [&](hLLM::Task *task) { // Getting inputs - const auto &cathetusASquaredMemSlot = task->getInput("Cathetus A Squared"); - const float* cathetusAsquared = (float *)cathetusASquaredMemSlot->getPointer(); - const auto &cathetusBSquaredMemSlot = task->getInput("Cathetus B Squared"); - const float* cathetusBsquared = (float *)cathetusBSquaredMemSlot->getPointer(); + const auto &cathetusASquaredMemSlot = task->getInput("Cathetus A Squared"); + const float *cathetusAsquared = (float *)cathetusASquaredMemSlot->getPointer(); + const auto &cathetusBSquaredMemSlot = task->getInput("Cathetus B Squared"); + const float *cathetusBsquared = (float *)cathetusBSquaredMemSlot->getPointer(); // Getting my replica id const auto replicaId = task->getReplicaIdx(); @@ -227,8 +223,7 @@ int main(int argc, char *argv[]) cathetusSquaredSummedOutput[replicaId] = (*cathetusAsquared) + (*cathetusBsquared); }); - hllm.registerFunction("Square Root Sum", [&](hLLM::Task *task) - { + hllm.registerFunction("Square Root Sum", [&](hLLM::Task *task) { // Getting my replica id const auto replicaId = task->getReplicaIdx(); @@ -246,12 +241,12 @@ int main(int argc, char *argv[]) if (isRoot) { // RNG for wait time between prompts - std::default_random_engine promptTimeRandomEngine; - std::uniform_real_distribution promptTimeRandomDistribution(0.0, 1.0); + std::default_random_engine promptTimeRandomEngine; + std::uniform_real_distribution promptTimeRandomDistribution(0.0, 1.0); // RNG for catheti values - std::default_random_engine cathetiRandomEngine; - std::uniform_real_distribution cathetiRandomDistribution(0.1, 10.0); + std::default_random_engine cathetiRandomEngine; + std::uniform_real_distribution cathetiRandomDistribution(0.1, 10.0); // Counter for the finished threads std::atomic finishedPromptThreads = 0; @@ -260,11 +255,10 @@ int main(int argc, char *argv[]) const auto tolerance = 0.0001; for (size_t i = 0; i < _PROMPT_THREAD_COUNT; i++) - promptThreads.push_back(std::make_unique([&, i]() - { + promptThreads.push_back(std::make_unique([&, i]() { // Wait until the hLLM has deployed while (hllm.isDeployed() == false); - + // Now create session auto session = hllm.createSession(); printf("[User %04lu] Created Session: %lu\n", i, session->getSessionId()); @@ -287,19 +281,30 @@ int main(int argc, char *argv[]) // printf("[User] Sent prompt (%lu/%lu): '%s'\n", promptId.first, promptId.second, prompt->getPrompt().c_str()); // Wait until the prompt receives a response - while(prompt->hasResponse() == false); + while (prompt->hasResponse() == false); // Getting response - const float response = *(float*)prompt->getResponse().data(); + const float response = *(float *)prompt->getResponse().data(); // Calculating const float error = std::abs(sqrtf(cathetusA * cathetusA + cathetusB * cathetusB) - response); // Printing response - printf("[User %04lu] Got response: %f for prompt %lu/%lu: '%s'. |Error|: %f (< tolerance: %f)\n", i, response, promptId.first, promptId.second, prompt->getPrompt().c_str(), error, tolerance); + printf("[User %04lu] Got response: %f for prompt %lu/%lu: '%s'. |Error|: %f (< tolerance: %f)\n", + i, + response, + promptId.first, + promptId.second, + prompt->getPrompt().c_str(), + error, + tolerance); // Verifying result - if (error > tolerance) { fprintf(stderr, "Response error is higher than tolerance, aborting...\n"); exit(-1); } + if (error > tolerance) + { + fprintf(stderr, "Response error is higher than tolerance, aborting...\n"); + exit(-1); + } // Waiting a random amount of time before sending the next prompt usleep(100000.0 * promptTimeRandomDistribution(promptTimeRandomEngine)); @@ -318,7 +323,8 @@ int main(int argc, char *argv[]) hllm.deploy(deployment); // // Waiting for prompt thread to finish - if (isRoot) for (auto& thread : promptThreads) thread->join(); + if (isRoot) + for (auto &thread : promptThreads) thread->join(); // Finalize Instance Manager instanceManager->finalize(); diff --git a/include/hllm/configuration/deployment.hpp b/include/hllm/configuration/deployment.hpp index a2b4e77..543a9f0 100644 --- a/include/hllm/configuration/deployment.hpp +++ b/include/hllm/configuration/deployment.hpp @@ -22,7 +22,7 @@ class Deployment final * Every 'interval' milliseconds, the coordinator sends a heartbeat message to its own replicas and vice-versa * If the 'tolerance' interval passes without a heartbeat, then the other side is deemed non-responsive * Recovery or fail tolerance mechanisms can be activated upon this happening - */ + */ struct heartbeat_t { // Whether to enable the heartbeat service or not @@ -42,9 +42,9 @@ class Deployment final * The control buffer is the edge between coordinators and replicas to exchange control messages. * Control messages are not related to the application, but instead to managing deployment aspects * (e.g., load balancing, fail detection, migration) - */ + */ static constexpr size_t _defaultControlBufferCapacity = 256; - static constexpr size_t _defaultControlBufferSize = _defaultControlBufferCapacity * 1024; + static constexpr size_t _defaultControlBufferSize = _defaultControlBufferCapacity * 1024; struct controlBuffer_t { // Capacity (the maximum number of pending tokens in the buffer) @@ -54,65 +54,66 @@ class Deployment final size_t size = _defaultControlBufferSize; // HiCR-specific objects to create the control buffers. These are to be set at runtime - HiCR::CommunicationManager* communicationManager = nullptr; - HiCR::MemoryManager* memoryManager = nullptr; - std::shared_ptr memorySpace = nullptr; + HiCR::CommunicationManager *communicationManager = nullptr; + HiCR::MemoryManager *memoryManager = nullptr; + std::shared_ptr memorySpace = nullptr; }; struct settings_t { - heartbeat_t heartbeat; + heartbeat_t heartbeat; controlBuffer_t controlBuffer; }; - Deployment(const std::string& name) : _name(name) {}; - Deployment(const nlohmann::json& js) { deserialize(js); }; - Deployment() = default; - ~Deployment() = default; + Deployment(const std::string &name) + : _name(name){}; + Deployment(const nlohmann::json &js) { deserialize(js); }; + Deployment() = default; + ~Deployment() = default; - __INLINE__ void setName(const std::string& name) { _name = name; } + __INLINE__ void setName(const std::string &name) { _name = name; } __INLINE__ void addPartition(const std::shared_ptr partition) { _partitions.push_back(partition); } __INLINE__ void addChannel(const std::shared_ptr channel) { _edges.push_back(channel); } [[nodiscard]] __INLINE__ std::string getName() const { return _name; } - [[nodiscard]] __INLINE__ auto& getPartitions() const { return _partitions; } - [[nodiscard]] __INLINE__ auto& getEdges() const { return _edges; } - [[nodiscard]] __INLINE__ auto& getHeartbeat() const { return _settings.heartbeat; } - [[nodiscard]] __INLINE__ auto& getControlBuffer() { return _settings.controlBuffer; } - [[nodiscard]] __INLINE__ auto& getControlBufferConst() const { return _settings.controlBuffer; } - [[nodiscard]] __INLINE__ auto& getRequestManager() const { return _requestManager; } - - [[nodiscard]] __INLINE__ nlohmann::json serialize() const + [[nodiscard]] __INLINE__ auto &getPartitions() const { return _partitions; } + [[nodiscard]] __INLINE__ auto &getEdges() const { return _edges; } + [[nodiscard]] __INLINE__ auto &getHeartbeat() const { return _settings.heartbeat; } + [[nodiscard]] __INLINE__ auto &getControlBuffer() { return _settings.controlBuffer; } + [[nodiscard]] __INLINE__ auto &getControlBufferConst() const { return _settings.controlBuffer; } + [[nodiscard]] __INLINE__ auto &getRequestManager() const { return _requestManager; } + + [[nodiscard]] __INLINE__ nlohmann::json serialize() const { nlohmann::json js; js["Name"] = _name; std::vector partitionsJs; - for (const auto& p : _partitions) partitionsJs.push_back(p->serialize()); + for (const auto &p : _partitions) partitionsJs.push_back(p->serialize()); js["Partitions"] = partitionsJs; std::vector edgesJs; - for (const auto& e : _edges) edgesJs.push_back(e->serialize()); + for (const auto &e : _edges) edgesJs.push_back(e->serialize()); js["Edges"] = edgesJs; js["Request Manager"] = _requestManager->serialize(); ////////////////////// Parsing settings auto settings = std::map(); - + // Heartbeat - auto heartbeat = std::map(); - heartbeat["Enabled"] = _settings.heartbeat.enabled; - heartbeat["Visible"] = _settings.heartbeat.visible; - heartbeat["Interval"] = _settings.heartbeat.interval; + auto heartbeat = std::map(); + heartbeat["Enabled"] = _settings.heartbeat.enabled; + heartbeat["Visible"] = _settings.heartbeat.visible; + heartbeat["Interval"] = _settings.heartbeat.interval; heartbeat["Tolerance"] = _settings.heartbeat.tolerance; - settings["Heartbeat"] = heartbeat; + settings["Heartbeat"] = heartbeat; // Control Buffer - auto controlBuffer = std::map(); - controlBuffer["Capacity"] = _settings.controlBuffer.capacity; - controlBuffer["Size"] = _settings.controlBuffer.size; + auto controlBuffer = std::map(); + controlBuffer["Capacity"] = _settings.controlBuffer.capacity; + controlBuffer["Size"] = _settings.controlBuffer.size; settings["Control Buffer"] = controlBuffer; js["Settings"] = settings; @@ -120,7 +121,7 @@ class Deployment final return js; } - __INLINE__ void deserialize (const nlohmann::json& js) + __INLINE__ void deserialize(const nlohmann::json &js) { // Clearing objects _partitions.clear(); @@ -128,39 +129,39 @@ class Deployment final _name = hicr::json::getString(js, "Name"); - const auto& partitions = hicr::json::getArray(js, "Partitions"); - for (const auto& p : partitions) _partitions.push_back(std::make_shared(p)); + const auto &partitions = hicr::json::getArray(js, "Partitions"); + for (const auto &p : partitions) _partitions.push_back(std::make_shared(p)); - const auto& edges = hicr::json::getArray(js, "Edges"); - for (const auto& e : edges) _edges.push_back(std::make_shared(e)); + const auto &edges = hicr::json::getArray(js, "Edges"); + for (const auto &e : edges) _edges.push_back(std::make_shared(e)); - const auto& requestManagerJs = hicr::json::getObject(js, "Request Manager"); - _requestManager = std::make_shared(requestManagerJs); + const auto &requestManagerJs = hicr::json::getObject(js, "Request Manager"); + _requestManager = std::make_shared(requestManagerJs); // Getting settings nlohmann::json settingsJs = hicr::json::getObject(js, "Settings"); - nlohmann::json heartbeatJs = hicr::json::getObject(settingsJs, "Heartbeat"); - _settings.heartbeat.enabled = hicr::json::getBoolean(heartbeatJs, "Enabled"); - _settings.heartbeat.visible = hicr::json::getBoolean(heartbeatJs, "Visible"); - _settings.heartbeat.interval = hicr::json::getNumber(heartbeatJs, "Interval"); + nlohmann::json heartbeatJs = hicr::json::getObject(settingsJs, "Heartbeat"); + _settings.heartbeat.enabled = hicr::json::getBoolean(heartbeatJs, "Enabled"); + _settings.heartbeat.visible = hicr::json::getBoolean(heartbeatJs, "Visible"); + _settings.heartbeat.interval = hicr::json::getNumber(heartbeatJs, "Interval"); _settings.heartbeat.tolerance = hicr::json::getNumber(heartbeatJs, "Tolerance"); - nlohmann::json controlBufferJs = hicr::json::getObject(settingsJs, "Control Buffer"); + nlohmann::json controlBufferJs = hicr::json::getObject(settingsJs, "Control Buffer"); _settings.controlBuffer.capacity = hicr::json::getNumber(controlBufferJs, "Capacity"); - _settings.controlBuffer.size = hicr::json::getNumber(controlBufferJs, "Size"); + _settings.controlBuffer.size = hicr::json::getNumber(controlBufferJs, "Size"); } - + // Includes all kinds of sanity checks relevant to a deployment __INLINE__ void verify() const { // Getting rqeuest manager input and output edges - const auto& requestManagerInputEdge = _requestManager->getInput(); - const auto& requestManagerOutputEdge = _requestManager->getOutput(); + const auto &requestManagerInputEdge = _requestManager->getInput(); + const auto &requestManagerOutputEdge = _requestManager->getOutput(); // Getting all partition's edges in a set std::set partitionNameSet; - for (const auto& partition : _partitions) partitionNameSet.insert(partition->getName()); + for (const auto &partition : _partitions) partitionNameSet.insert(partition->getName()); // Getting a list of all edges to check the tasks specifying inputs/outputs actually refer to one of these, or user interface std::set edgeNameSet; @@ -168,27 +169,28 @@ class Deployment final // Getting a list of all the inputs and output names to verify they correspond to each other at least once std::set inputSet; std::set outputSet; - + // Storage for those partitions with user interface input and output - std::shared_ptr userInterfaceInputPartition = nullptr; + std::shared_ptr userInterfaceInputPartition = nullptr; std::shared_ptr userInterfaceOutputPartition = nullptr; // Now checking all dependencies belong in the same partition std::set tasksWithDependencies; - for (const auto& partition : _partitions) + for (const auto &partition : _partitions) { // Creating set of task names of this partition std::set _taskNames; - for (const auto& task : partition->getTasks()) _taskNames.insert(task->getFunctionName()); + for (const auto &task : partition->getTasks()) _taskNames.insert(task->getFunctionName()); - for (const auto& task : partition->getTasks()) - for (const auto& dependency : task->getDependencies()) + for (const auto &task : partition->getTasks()) + for (const auto &dependency : task->getDependencies()) { // Check the task doesn't depend on itself if (dependency == task->getFunctionName()) HICR_THROW_LOGIC("Task %s has a dependency on itself\n", task->getFunctionName().c_str()); // Check the dependency has a task that is referenced by this dependency - if (_taskNames.contains(dependency) == false) HICR_THROW_LOGIC("Task %s has a dependency on '%s' which is not defined in this partition\n", task->getFunctionName().c_str(), dependency.c_str()); + if (_taskNames.contains(dependency) == false) + HICR_THROW_LOGIC("Task %s has a dependency on '%s' which is not defined in this partition\n", task->getFunctionName().c_str(), dependency.c_str()); // Adding dependency to tasks with dependencies tasksWithDependencies.insert(dependency); @@ -196,9 +198,9 @@ class Deployment final } // First, the user interface input/outputs are counted as edges for the purposes of this check - for (const auto& edge : _edges) + for (const auto &edge : _edges) { - const auto& edgeName = edge->getName(); + const auto &edgeName = edge->getName(); if (edgeNameSet.contains(edgeName)) HICR_THROW_LOGIC("Deployment specifies repeated edge or user input '%s'\n", edgeName.c_str()); edgeNameSet.insert(edgeName); } @@ -206,14 +208,17 @@ class Deployment final // For each input / output, remember which partition is its consumer / producer std::map consumerPartitionMap; std::map producerPartitionMap; - for (const auto& partition : _partitions) - for (const auto& task : partition->getTasks()) + for (const auto &partition : _partitions) + for (const auto &task : partition->getTasks()) { // Make sure all tasks have at least one input - if (task->getInputs().size() == 0 && task->getDependencies().empty() == true) HICR_THROW_LOGIC("Deployment specifies task in partition '%s' with function name '%s' without any inputs or dependencies\n", partition->getName().c_str(), task->getFunctionName().c_str()); + if (task->getInputs().size() == 0 && task->getDependencies().empty() == true) + HICR_THROW_LOGIC("Deployment specifies task in partition '%s' with function name '%s' without any inputs or dependencies\n", + partition->getName().c_str(), + task->getFunctionName().c_str()); // Check that the edge is not used as input more than once. All edges must be 1-to-1 - for (const auto& input : task->getInputs()) + for (const auto &input : task->getInputs()) { if (inputSet.contains(input)) HICR_THROW_LOGIC("Deployment specifies input '%s' used more than once\n", input.c_str()); if (edgeNameSet.contains(input) == false) HICR_THROW_LOGIC("Deployment specifies task '%s' with an undefined input '%s'\n", task->getFunctionName(), input.c_str()); @@ -222,15 +227,20 @@ class Deployment final // Check the task that contains the user interface input does not receive any other inputs if (input == requestManagerInputEdge) userInterfaceInputPartition = partition; - if (input == requestManagerOutputEdge) HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface output '%s' which is being used as input\n", task->getFunctionName(), input.c_str()); - if (input == requestManagerInputEdge && task->getInputs().size() > 1) HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface input '%s' which is not the only input of that task\n", task->getFunctionName(), input.c_str()); - } + if (input == requestManagerOutputEdge) + HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface output '%s' which is being used as input\n", task->getFunctionName(), input.c_str()); + if (input == requestManagerInputEdge && task->getInputs().size() > 1) + HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface input '%s' which is not the only input of that task\n", task->getFunctionName(), input.c_str()); + } // Make sure all tasks have at least one output - if (task->getOutputs().size() == 0 && tasksWithDependencies.contains(task->getFunctionName()) == false) HICR_THROW_LOGIC("Deployment specifies task in partition '%'' with function name '%s' without any outputs or dependents\n", partition->getName().c_str(), task->getFunctionName().c_str()); + if (task->getOutputs().size() == 0 && tasksWithDependencies.contains(task->getFunctionName()) == false) + HICR_THROW_LOGIC("Deployment specifies task in partition '%'' with function name '%s' without any outputs or dependents\n", + partition->getName().c_str(), + task->getFunctionName().c_str()); // Check that the output is not used as input more than once. All edges must be 1-to-1 - for (const auto& output : task->getOutputs()) + for (const auto &output : task->getOutputs()) { if (outputSet.contains(output)) HICR_THROW_LOGIC("Deployment specifies output '%s' used more than once\n", output.c_str()); if (edgeNameSet.contains(output) == false) HICR_THROW_LOGIC("Deployment specifies task '%s' with an undefined output '%s'\n", task->getFunctionName(), output.c_str()); @@ -239,83 +249,95 @@ class Deployment final // Check the task that contains the user interface input does not receive any other inputs if (output == requestManagerOutputEdge) userInterfaceOutputPartition = partition; - if (output == requestManagerInputEdge) HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface input '%s' which is being used as output\n", task->getFunctionName(), output.c_str()); - if (output == requestManagerOutputEdge && task->getOutputs().size() > 1) HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface output '%s' which is not the only output of task\n", task->getFunctionName(), output.c_str()); - } + if (output == requestManagerInputEdge) + HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface input '%s' which is being used as output\n", task->getFunctionName(), output.c_str()); + if (output == requestManagerOutputEdge && task->getOutputs().size() > 1) + HICR_THROW_LOGIC("Deployment specifies task '%s' with user interface output '%s' which is not the only output of task\n", task->getFunctionName(), output.c_str()); + } } // Check whether all edges have consumer+producer partitions that do exist - for (const auto& edge : _edges) + for (const auto &edge : _edges) { - const auto& edgeName = edge->getName(); + const auto &edgeName = edge->getName(); if (edgeName != requestManagerInputEdge) - if (edgeName != requestManagerOutputEdge) - if (consumerPartitionMap.contains(edgeName) == false || producerPartitionMap.contains(edgeName) == false) - HICR_THROW_LOGIC("Deployment specifies edge '%s' but it is either not used as input or output (or neither)\n", edge->getName().c_str()); + if (edgeName != requestManagerOutputEdge) + if (consumerPartitionMap.contains(edgeName) == false || producerPartitionMap.contains(edgeName) == false) + HICR_THROW_LOGIC("Deployment specifies edge '%s' but it is either not used as input or output (or neither)\n", edge->getName().c_str()); // Getting producer and consumer partitions - const auto& producerPartition = producerPartitionMap[edge->getName()]; - const auto& consumerPartition = consumerPartitionMap[edge->getName()]; + const auto &producerPartition = producerPartitionMap[edge->getName()]; + const auto &consumerPartition = consumerPartitionMap[edge->getName()]; // Checking the edge connects two different partitions - if (producerPartition == consumerPartition) HICR_THROW_LOGIC("Deployment specifies edge '%s' that is both produced and consumed by partition %s\n", edge->getName().c_str(), producerPartition.c_str()); + if (producerPartition == consumerPartition) + HICR_THROW_LOGIC("Deployment specifies edge '%s' that is both produced and consumed by partition %s\n", edge->getName().c_str(), producerPartition.c_str()); - // Setting producer and consumer partitions for the edge - edge->setProducer(producerPartition); - edge->setConsumer(consumerPartition); + // Setting producer and consumer partitions for the edge + edge->setProducer(producerPartition); + edge->setConsumer(consumerPartition); } // Make sure all inputs are also used as outputs, as long as it is not the user interface input - for (const auto& input : inputSet) if (input != requestManagerInputEdge) if (outputSet.contains(input) == false) HICR_THROW_LOGIC("Deployment input '%s' is not associated to any output\n", input.c_str()); - for (const auto& output : outputSet) if (output != requestManagerOutputEdge) if (inputSet.contains(output) == false) HICR_THROW_LOGIC("Deployment output '%s' is not associated to any input\n", output.c_str()); + for (const auto &input : inputSet) + if (input != requestManagerInputEdge) + if (outputSet.contains(input) == false) HICR_THROW_LOGIC("Deployment input '%s' is not associated to any output\n", input.c_str()); + for (const auto &output : outputSet) + if (output != requestManagerOutputEdge) + if (inputSet.contains(output) == false) HICR_THROW_LOGIC("Deployment output '%s' is not associated to any input\n", output.c_str()); // Check the user interface input/output are being used if (userInterfaceInputPartition == nullptr) HICR_THROW_LOGIC("User interface input '%s' is not associated to any partition\n", requestManagerInputEdge.c_str()); if (userInterfaceOutputPartition == nullptr) HICR_THROW_LOGIC("User interface output '%s' is not associated to any partition\n", requestManagerOutputEdge.c_str()); // Make sure the partition which contains the user interface input does not contain any cross-partition inputs - for (const auto& task : userInterfaceInputPartition->getTasks()) - for (const auto& input : task->getInputs()) + for (const auto &task : userInterfaceInputPartition->getTasks()) + for (const auto &input : task->getInputs()) if (input != requestManagerInputEdge) - if (producerPartitionMap.at(input) != userInterfaceInputPartition->getName()) - HICR_THROW_LOGIC("Partition %s consumes the user interface input '%s' but also has other inter-partition inputs (e.g.,: '%s')\n", userInterfaceInputPartition->getName().c_str(), requestManagerInputEdge.c_str(), input.c_str()); - + if (producerPartitionMap.at(input) != userInterfaceInputPartition->getName()) + HICR_THROW_LOGIC("Partition %s consumes the user interface input '%s' but also has other inter-partition inputs (e.g.,: '%s')\n", + userInterfaceInputPartition->getName().c_str(), + requestManagerInputEdge.c_str(), + input.c_str()); + // Make sure the partition which contains the user interface output does not contain any cross-partition outputs - for (const auto& task : userInterfaceOutputPartition->getTasks()) - for (const auto& output : task->getOutputs()) + for (const auto &task : userInterfaceOutputPartition->getTasks()) + for (const auto &output : task->getOutputs()) if (output != requestManagerOutputEdge) if (consumerPartitionMap.at(output) != userInterfaceOutputPartition->getName()) - HICR_THROW_LOGIC("Partition %s produces the user interface output '%s' but also has other inter-partition inputs (e.g.,: '%s')\n", userInterfaceOutputPartition->getName().c_str(), requestManagerOutputEdge.c_str(), output.c_str()); + HICR_THROW_LOGIC("Partition %s produces the user interface output '%s' but also has other inter-partition inputs (e.g.,: '%s')\n", + userInterfaceOutputPartition->getName().c_str(), + requestManagerOutputEdge.c_str(), + output.c_str()); - // Setting producer partition for user interface input to be the same as the consumer - for (const auto& edge : _edges) + // Setting producer partition for user interface input to be the same as the consumer + for (const auto &edge : _edges) { - const auto& edgeName = edge->getName(); + const auto &edgeName = edge->getName(); if (edgeName == requestManagerInputEdge) { - edge->setProducer(userInterfaceInputPartition->getName()); - edge->setConsumer(userInterfaceInputPartition->getName()); - edge->setPromptEdge(true); + edge->setProducer(userInterfaceInputPartition->getName()); + edge->setConsumer(userInterfaceInputPartition->getName()); + edge->setPromptEdge(true); } if (edgeName == requestManagerOutputEdge) { - edge->setProducer(userInterfaceOutputPartition->getName()); - edge->setConsumer(userInterfaceOutputPartition->getName()); - edge->setResultEdge(true); + edge->setProducer(userInterfaceOutputPartition->getName()); + edge->setConsumer(userInterfaceOutputPartition->getName()); + edge->setResultEdge(true); } } } private: - std::string _name; + std::string _name; std::vector> _partitions; - std::vector> _edges; - std::shared_ptr _requestManager; - settings_t _settings; - + std::vector> _edges; + std::shared_ptr _requestManager; + settings_t _settings; }; // class Deployment diff --git a/include/hllm/configuration/partition.hpp b/include/hllm/configuration/partition.hpp index 414cea5..abb3ea5 100644 --- a/include/hllm/configuration/partition.hpp +++ b/include/hllm/configuration/partition.hpp @@ -13,69 +13,71 @@ namespace hLLM::configuration class Partition final { - public: + public: typedef uint64_t partitionIndex_t; - + Partition(const nlohmann::json js) { deserialize(js); }; - Partition(const std::string& name, const HiCR::Instance::instanceId_t instanceId) - : _name(name), _coordinatorInstanceId(instanceId) - {} + Partition(const std::string &name, const HiCR::Instance::instanceId_t instanceId) + : _name(name), + _coordinatorInstanceId(instanceId) + {} ~Partition() = default; - __INLINE__ void setName(const std::string& name) { _name = name; } + __INLINE__ void setName(const std::string &name) { _name = name; } __INLINE__ void setCoordinatorInstanceId(const HiCR::Instance::instanceId_t instanceId) { _coordinatorInstanceId = instanceId; } __INLINE__ void addTask(const std::shared_ptr task) { _tasks.push_back(task); } __INLINE__ void addReplica(const std::shared_ptr replica) { _replicas.push_back(replica); } - [[nodiscard]] __INLINE__ auto getName() const { return _name; } - [[nodiscard]] __INLINE__ auto getCoordinatorInstanceId() const { return _coordinatorInstanceId; } - [[nodiscard]] __INLINE__ auto& getTasks() const { return _tasks; } - [[nodiscard]] __INLINE__ auto& getReplicas() const { return _replicas; } + [[nodiscard]] __INLINE__ auto getName() const { return _name; } + [[nodiscard]] __INLINE__ auto getCoordinatorInstanceId() const { return _coordinatorInstanceId; } + [[nodiscard]] __INLINE__ auto &getTasks() const { return _tasks; } + [[nodiscard]] __INLINE__ auto &getReplicas() const { return _replicas; } - [[nodiscard]] __INLINE__ nlohmann::json serialize() const + [[nodiscard]] __INLINE__ nlohmann::json serialize() const { nlohmann::json js; - js["Name"] = _name; + js["Name"] = _name; js["Coordinator Instance Id"] = _coordinatorInstanceId; std::vector tasksJs; - for (const auto& t : _tasks) tasksJs.push_back(t->serialize()); + for (const auto &t : _tasks) tasksJs.push_back(t->serialize()); js["Tasks"] = tasksJs; std::vector replicasJs; - for (const auto& r : _replicas) replicasJs.push_back(r->serialize()); + for (const auto &r : _replicas) replicasJs.push_back(r->serialize()); js["Replicas"] = replicasJs; return js; } - __INLINE__ void deserialize (const nlohmann::json& js) + __INLINE__ void deserialize(const nlohmann::json &js) { // Clearing objects _tasks.clear(); _replicas.clear(); _name = hicr::json::getString(js, "Name"); - if (js.contains("Coordinator Instance Id")) _coordinatorInstanceId = hicr::json::getNumber(js, "Coordinator Instance Id"); // Optional, as it is determined at runtime - - const auto& tasks = hicr::json::getArray(js, "Tasks"); - for (const auto& t : tasks) _tasks.push_back(std::make_shared(t)); + if (js.contains("Coordinator Instance Id")) + _coordinatorInstanceId = hicr::json::getNumber(js, "Coordinator Instance Id"); // Optional, as it is determined at runtime + + const auto &tasks = hicr::json::getArray(js, "Tasks"); + for (const auto &t : tasks) _tasks.push_back(std::make_shared(t)); // This entry is optional, as it can be decided at runtime if (js.contains("Replicas")) { - const auto& replicas = hicr::json::getArray(js, "Replicas"); - for (const auto& r : replicas) _replicas.push_back(std::make_shared(r)); + const auto &replicas = hicr::json::getArray(js, "Replicas"); + for (const auto &r : replicas) _replicas.push_back(std::make_shared(r)); } } private: - - std::string _name; - HiCR::Instance::instanceId_t _coordinatorInstanceId; - std::vector> _tasks; + + std::string _name; + HiCR::Instance::instanceId_t _coordinatorInstanceId; + std::vector> _tasks; std::vector> _replicas; }; // class Partition diff --git a/include/hllm/configuration/replica.hpp b/include/hllm/configuration/replica.hpp index 7a0f4d4..afa307a 100644 --- a/include/hllm/configuration/replica.hpp +++ b/include/hllm/configuration/replica.hpp @@ -11,18 +11,20 @@ namespace hLLM::configuration class Replica final { public: - + typedef uint64_t replicaIndex_t; Replica(const nlohmann::json js) { deserialize(js); }; - Replica(const HiCR::Instance::instanceId_t instanceId) : _instanceId(instanceId) {} + Replica(const HiCR::Instance::instanceId_t instanceId) + : _instanceId(instanceId) + {} ~Replica() = default; __INLINE__ void setInstanceId(const HiCR::Instance::instanceId_t instanceId) { _instanceId = instanceId; } [[nodiscard]] __INLINE__ auto getInstanceId() const { return _instanceId; } - [[nodiscard]] __INLINE__ nlohmann::json serialize() const + [[nodiscard]] __INLINE__ nlohmann::json serialize() const { nlohmann::json js; @@ -31,14 +33,14 @@ class Replica final return js; } - __INLINE__ void deserialize (const nlohmann::json& js) + __INLINE__ void deserialize(const nlohmann::json &js) { if (js.contains("Instance Id")) _instanceId = hicr::json::getNumber(js, "Instance Id"); // Optional, as it is determined at runtime } private: - HiCR::Instance::instanceId_t _instanceId; + HiCR::Instance::instanceId_t _instanceId; }; // class Replica diff --git a/include/hllm/configuration/requestManager.hpp b/include/hllm/configuration/requestManager.hpp index 46df81f..3926386 100644 --- a/include/hllm/configuration/requestManager.hpp +++ b/include/hllm/configuration/requestManager.hpp @@ -11,42 +11,45 @@ namespace hLLM::configuration class RequestManager final { public: - + RequestManager(const nlohmann::json js) { deserialize(js); }; - RequestManager(const std::string& input, const std::string& output) : _input(input), _output(output) {} + RequestManager(const std::string &input, const std::string &output) + : _input(input), + _output(output) + {} ~RequestManager() = default; - __INLINE__ void setInput(const std::string& input) { _input = input; } - __INLINE__ void setOutput(const std::string& output) { _output = output; } + __INLINE__ void setInput(const std::string &input) { _input = input; } + __INLINE__ void setOutput(const std::string &output) { _output = output; } __INLINE__ void setInstanceId(const HiCR::Instance::instanceId_t instanceId) { _instanceId = instanceId; } [[nodiscard]] __INLINE__ auto getInput() const { return _input; } [[nodiscard]] __INLINE__ auto getOutput() const { return _output; } [[nodiscard]] __INLINE__ auto getInstanceId() const { return _instanceId; } - [[nodiscard]] __INLINE__ nlohmann::json serialize() const + [[nodiscard]] __INLINE__ nlohmann::json serialize() const { nlohmann::json js; - js["Input"] = _input; - js["Output"] = _output; + js["Input"] = _input; + js["Output"] = _output; js["Instance Id"] = _instanceId; return js; } - __INLINE__ void deserialize (const nlohmann::json& js) + __INLINE__ void deserialize(const nlohmann::json &js) { - _input = hicr::json::getString(js, "Input"); - _output = hicr::json::getString(js, "Output"); + _input = hicr::json::getString(js, "Input"); + _output = hicr::json::getString(js, "Output"); if (js.contains("Instance Id")) _instanceId = hicr::json::getNumber(js, "Instance Id"); // Optional, as it is determined at runtime } private: - std::string _input; - std::string _output; - HiCR::Instance::instanceId_t _instanceId; + std::string _input; + std::string _output; + HiCR::Instance::instanceId_t _instanceId; }; // class RequestManager diff --git a/include/hllm/configuration/task.hpp b/include/hllm/configuration/task.hpp index 839ce75..33224b4 100644 --- a/include/hllm/configuration/task.hpp +++ b/include/hllm/configuration/task.hpp @@ -12,45 +12,45 @@ class Task final { public: - Task(const nlohmann::json& js) { deserialize(js); }; - Task(const std::string& functionName) - : _functionName(functionName) + Task(const nlohmann::json &js) { deserialize(js); }; + Task(const std::string &functionName) + : _functionName(functionName) {} ~Task() = default; - __INLINE__ void setFunctionName(const std::string& functionName) { _functionName = functionName; } - __INLINE__ void addInput(const std::string& input) { _inputs.push_back(input); } - __INLINE__ void addOutput(const std::string& output) { _outputs.push_back(output); } - __INLINE__ void addDependency(const std::string& functionName) { _dependencies.push_back(functionName); } + __INLINE__ void setFunctionName(const std::string &functionName) { _functionName = functionName; } + __INLINE__ void addInput(const std::string &input) { _inputs.push_back(input); } + __INLINE__ void addOutput(const std::string &output) { _outputs.push_back(output); } + __INLINE__ void addDependency(const std::string &functionName) { _dependencies.push_back(functionName); } - [[nodiscard]] __INLINE__ auto getFunctionName() const { return _functionName; } - [[nodiscard]] __INLINE__ auto& getInputs() const { return _inputs; } - [[nodiscard]] __INLINE__ auto& getOutputs() const { return _outputs; } - [[nodiscard]] __INLINE__ auto& getDependencies() const { return _dependencies; } + [[nodiscard]] __INLINE__ auto getFunctionName() const { return _functionName; } + [[nodiscard]] __INLINE__ auto &getInputs() const { return _inputs; } + [[nodiscard]] __INLINE__ auto &getOutputs() const { return _outputs; } + [[nodiscard]] __INLINE__ auto &getDependencies() const { return _dependencies; } - [[nodiscard]] __INLINE__ nlohmann::json serialize() const + [[nodiscard]] __INLINE__ nlohmann::json serialize() const { nlohmann::json js; js["Function Name"] = _functionName; - js["Inputs"] = _inputs; - js["Outputs"] = _outputs; - js["Dependencies"] = _dependencies; + js["Inputs"] = _inputs; + js["Outputs"] = _outputs; + js["Dependencies"] = _dependencies; return js; } - __INLINE__ void deserialize (const nlohmann::json& js) + __INLINE__ void deserialize(const nlohmann::json &js) { _functionName = hicr::json::getString(js, "Function Name"); - _inputs = hicr::json::getArray(js, "Inputs"); - _outputs = hicr::json::getArray(js, "Outputs"); + _inputs = hicr::json::getArray(js, "Inputs"); + _outputs = hicr::json::getArray(js, "Outputs"); _dependencies = hicr::json::getArray(js, "Dependencies"); } private: - std::string _functionName; + std::string _functionName; std::vector _inputs; std::vector _outputs; std::vector _dependencies; diff --git a/include/hllm/edge/base.hpp b/include/hllm/edge/base.hpp index d7d6fa2..f6d6b53 100644 --- a/include/hllm/edge/base.hpp +++ b/include/hllm/edge/base.hpp @@ -16,9 +16,9 @@ namespace hLLM::edge typedef uint8_t edgeTypeDatatype_t; enum edgeType_t : edgeTypeDatatype_t { - coordinatorToCoordinator = 0, - coordinatorToReplica = 1, - replicaToCoordinator = 2, + coordinatorToCoordinator = 0, + coordinatorToReplica = 1, + replicaToCoordinator = 2, coordinatorToRequestManager = 3, requestManagerToCoordinator = 4 }; @@ -26,66 +26,73 @@ enum edgeType_t : edgeTypeDatatype_t struct memorySlotExchangeInfo_t { /// Pointer to the communication manager required to exchange this memory slot - HiCR::CommunicationManager* communicationManager; + HiCR::CommunicationManager *communicationManager; /// Global key to use for the exchange HiCR::GlobalMemorySlot::globalKey_t globalKey; - + /// Local memory slot to exchange std::shared_ptr memorySlot; }; -class Base +class Base { public: - static constexpr size_t maxEdgeIndexBits = 24; - static constexpr size_t maxProducerPartitionIndexBits = 12; - static constexpr size_t maxConsumerPartitionIndexBits = 12; - static constexpr size_t maxReplicaIndexBits = 9; - static constexpr size_t maxEdgeTypeBits = 3; - static constexpr size_t maxChannelSpecificKeyBits = 4; - static constexpr configuration::Edge::edgeIndex_t maxEdgeIndex = 1ul << maxEdgeIndexBits; - static constexpr configuration::Partition::partitionIndex_t maxProducerPartitionIndex = 1ul << maxProducerPartitionIndexBits; - static constexpr configuration::Partition::partitionIndex_t maxConsumerPartitionIndex = 1ul << maxConsumerPartitionIndexBits; - static constexpr configuration::Replica::replicaIndex_t maxReplicaIndex = 1ul << maxReplicaIndexBits; - static constexpr edgeTypeDatatype_t maxEdgeType = 1u << maxEdgeTypeBits; - static constexpr HiCR::GlobalMemorySlot::globalKey_t maxChannelSpecificKey = 1ul << maxChannelSpecificKeyBits; + static constexpr size_t maxEdgeIndexBits = 24; + static constexpr size_t maxProducerPartitionIndexBits = 12; + static constexpr size_t maxConsumerPartitionIndexBits = 12; + static constexpr size_t maxReplicaIndexBits = 9; + static constexpr size_t maxEdgeTypeBits = 3; + static constexpr size_t maxChannelSpecificKeyBits = 4; + static constexpr configuration::Edge::edgeIndex_t maxEdgeIndex = 1ul << maxEdgeIndexBits; + static constexpr configuration::Partition::partitionIndex_t maxProducerPartitionIndex = 1ul << maxProducerPartitionIndexBits; + static constexpr configuration::Partition::partitionIndex_t maxConsumerPartitionIndex = 1ul << maxConsumerPartitionIndexBits; + static constexpr configuration::Replica::replicaIndex_t maxReplicaIndex = 1ul << maxReplicaIndexBits; + static constexpr edgeTypeDatatype_t maxEdgeType = 1u << maxEdgeTypeBits; + static constexpr HiCR::GlobalMemorySlot::globalKey_t maxChannelSpecificKey = 1ul << maxChannelSpecificKeyBits; // When it comes to communication between coordinators, the replica index is indicated as the maximum value possible static constexpr configuration::Replica::replicaIndex_t coordinatorReplicaIndex = maxReplicaIndex - 1; - + // When it comes to control messages, the edge index is indicated as the maximum value possible static constexpr configuration::Edge::edgeIndex_t controlEdgeIndex = maxEdgeIndex - 1; - - Base(const configuration::Edge edgeConfig, - const edgeType_t edgeType, - const configuration::Edge::edgeIndex_t edgeIndex, + + Base(const configuration::Edge edgeConfig, + const edgeType_t edgeType, + const configuration::Edge::edgeIndex_t edgeIndex, const configuration::Partition::partitionIndex_t producerPartitionIndex, const configuration::Partition::partitionIndex_t consumerPartitionIndex, - const configuration::Replica::replicaIndex_t replicaIndex) : - _edgeConfig(edgeConfig), - _edgeType(edgeType), - _edgeIndex(edgeIndex), - _producerPartitionIndex(producerPartitionIndex), - _consumerPartitionIndex(consumerPartitionIndex), - _replicaIndex(replicaIndex) + const configuration::Replica::replicaIndex_t replicaIndex) + : _edgeConfig(edgeConfig), + _edgeType(edgeType), + _edgeIndex(edgeIndex), + _producerPartitionIndex(producerPartitionIndex), + _consumerPartitionIndex(consumerPartitionIndex), + _replicaIndex(replicaIndex) { // Verifying all the required HiCR object have been passed - if (_edgeConfig.getPayloadCommunicationManager () == nullptr) HICR_THROW_LOGIC("Required HiCR object 'PayloadCommunicationManager' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); - if (_edgeConfig.getPayloadMemoryManager () == nullptr) HICR_THROW_LOGIC("Required HiCR object 'PayloadMemoryManager' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); - if (_edgeConfig.getPayloadMemorySpace () == nullptr) HICR_THROW_LOGIC("Required HiCR object 'PayloadMemorySpace' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); - if (_edgeConfig.getCoordinationCommunicationManager() == nullptr) HICR_THROW_LOGIC("Required HiCR object 'CoordinationCommunicationManager' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); - if (_edgeConfig.getCoordinationMemoryManager () == nullptr) HICR_THROW_LOGIC("Required HiCR object 'CoordinationMemoryManager not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); - if (_edgeConfig.getCoordinationMemorySpace () == nullptr) HICR_THROW_LOGIC("Required HiCR object 'CoordinationMemorySpace' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); + if (_edgeConfig.getPayloadCommunicationManager() == nullptr) + HICR_THROW_LOGIC("Required HiCR object 'PayloadCommunicationManager' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); + if (_edgeConfig.getPayloadMemoryManager() == nullptr) + HICR_THROW_LOGIC("Required HiCR object 'PayloadMemoryManager' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); + if (_edgeConfig.getPayloadMemorySpace() == nullptr) + HICR_THROW_LOGIC("Required HiCR object 'PayloadMemorySpace' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); + if (_edgeConfig.getCoordinationCommunicationManager() == nullptr) + HICR_THROW_LOGIC("Required HiCR object 'CoordinationCommunicationManager' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); + if (_edgeConfig.getCoordinationMemoryManager() == nullptr) + HICR_THROW_LOGIC("Required HiCR object 'CoordinationMemoryManager not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); + if (_edgeConfig.getCoordinationMemorySpace() == nullptr) + HICR_THROW_LOGIC("Required HiCR object 'CoordinationMemorySpace' not provided at deployment time for edge '%s'", _edgeConfig.getName().c_str()); // Reserving memory for the local coordination buffers const auto coordinationBufferSize = HiCR::channel::Base::getCoordinationBufferSize(); - - _dataChannelLocalCoordinationBufferForSizes = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), coordinationBufferSize); - _dataChannelLocalCoordinationBufferForPayloads = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), coordinationBufferSize); - _metadataChannelLocalCoordinationBuffer = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), coordinationBufferSize); + _dataChannelLocalCoordinationBufferForSizes = + _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), coordinationBufferSize); + _dataChannelLocalCoordinationBufferForPayloads = + _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), coordinationBufferSize); + _metadataChannelLocalCoordinationBuffer = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), coordinationBufferSize); HiCR::channel::Base::initializeCoordinationBuffer(_dataChannelLocalCoordinationBufferForSizes); HiCR::channel::Base::initializeCoordinationBuffer(_dataChannelLocalCoordinationBufferForPayloads); HiCR::channel::Base::initializeCoordinationBuffer(_metadataChannelLocalCoordinationBuffer); @@ -99,29 +106,38 @@ class Base _edgeConfig.getCoordinationMemoryManager()->freeLocalMemorySlot(_metadataChannelLocalCoordinationBuffer); } - virtual void getMemorySlotsToExchange(std::vector& memorySlots) const = 0; - + virtual void getMemorySlotsToExchange(std::vector &memorySlots) const = 0; + __INLINE__ auto getProducerPartitionIndex() const { return _producerPartitionIndex; } __INLINE__ auto getConsumerPartitionIndex() const { return _consumerPartitionIndex; } - __INLINE__ auto getReplicaIndex() const { return _replicaIndex; } + __INLINE__ auto getReplicaIndex() const { return _replicaIndex; } __INLINE__ auto getEdgeIndex() const { return _edgeIndex; } __INLINE__ auto getEdgeConfig() const { return _edgeConfig; } - + // Function to initialize the channels. It must be called only all the memory slots have been exchanged __INLINE__ void initialize(const HiCR::GlobalMemorySlot::tag_t tag) { ///// Data Channel common (producer and consumer) global memory slots - _dataChannelConsumerSizesBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerSizesBufferKey)); - _dataChannelConsumerPayloadBuffer = _edgeConfig.getPayloadCommunicationManager()->getGlobalMemorySlot( tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerPayloadBufferKey)); - _dataChannelConsumerCoordinationBufferForSizes = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforSizesKey)); - _dataChannelConsumerCoordinationBufferForPayloads = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforPayloadKey)); - _dataChannelProducerCoordinationBufferForSizes = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforSizesKey)); - _dataChannelProducerCoordinationBufferForPayloads = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforPayloadKey)); + _dataChannelConsumerSizesBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerSizesBufferKey)); + _dataChannelConsumerPayloadBuffer = _edgeConfig.getPayloadCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerPayloadBufferKey)); + _dataChannelConsumerCoordinationBufferForSizes = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforSizesKey)); + _dataChannelConsumerCoordinationBufferForPayloads = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforPayloadKey)); + _dataChannelProducerCoordinationBufferForSizes = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforSizesKey)); + _dataChannelProducerCoordinationBufferForPayloads = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforPayloadKey)); ///// Metadata Channel common (producer and consumer) global memory slots - _metadataChannelConsumerPayloadBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerPayloadBufferKey)); - _metadataChannelConsumerCoordinationBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerCoordinationBufferKey)); - _metadataChannelProducerCoordinationBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot(tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelProducerCoordinationBufferKey)); + _metadataChannelConsumerPayloadBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerPayloadBufferKey)); + _metadataChannelConsumerCoordinationBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerCoordinationBufferKey)); + _metadataChannelProducerCoordinationBuffer = _edgeConfig.getCoordinationCommunicationManager()->getGlobalMemorySlot( + tag, encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelProducerCoordinationBufferKey)); // Creating channels now createChannels(); @@ -134,13 +150,12 @@ class Base virtual void createChannels() = 0; - __INLINE__ static HiCR::GlobalMemorySlot::globalKey_t encodeGlobalKey( - const configuration::Edge::edgeIndex_t edgeIndex, - const configuration::Partition::partitionIndex_t producerPartitionIndex, - const configuration::Partition::partitionIndex_t consumerPartitionIndex, - const configuration::Replica::replicaIndex_t replicaIndex, - const edgeType_t edgeType, - const HiCR::GlobalMemorySlot::globalKey_t channelKey) + __INLINE__ static HiCR::GlobalMemorySlot::globalKey_t encodeGlobalKey(const configuration::Edge::edgeIndex_t edgeIndex, + const configuration::Partition::partitionIndex_t producerPartitionIndex, + const configuration::Partition::partitionIndex_t consumerPartitionIndex, + const configuration::Replica::replicaIndex_t replicaIndex, + const edgeType_t edgeType, + const HiCR::GlobalMemorySlot::globalKey_t channelKey) { // Encoding reserves: // + 24 bits for edgeIndex (max: 16777216) @@ -152,8 +167,10 @@ class Base // Sanity checks if (edgeIndex >= maxEdgeIndex) HICR_THROW_LOGIC("Base index %lu exceeds maximum: %lu\n", edgeIndex, maxEdgeIndex); - if (producerPartitionIndex >= maxProducerPartitionIndex) HICR_THROW_LOGIC("Producer partition index %lu exceeds maximum: %lu\n", producerPartitionIndex, maxProducerPartitionIndex); - if (consumerPartitionIndex >= maxConsumerPartitionIndex) HICR_THROW_LOGIC("Producer partition index %lu exceeds maximum: %lu\n", consumerPartitionIndex, maxConsumerPartitionIndex); + if (producerPartitionIndex >= maxProducerPartitionIndex) + HICR_THROW_LOGIC("Producer partition index %lu exceeds maximum: %lu\n", producerPartitionIndex, maxProducerPartitionIndex); + if (consumerPartitionIndex >= maxConsumerPartitionIndex) + HICR_THROW_LOGIC("Producer partition index %lu exceeds maximum: %lu\n", consumerPartitionIndex, maxConsumerPartitionIndex); if (replicaIndex >= maxReplicaIndex) HICR_THROW_LOGIC("Replica index %lu exceeds maximum: %lu\n", replicaIndex, maxReplicaIndex); if (edgeType >= maxEdgeType) HICR_THROW_LOGIC("Edge type value %lu exceeds maximum: %lu (this must be a bug in hLLM)\n", edgeType, maxEdgeType); if (channelKey >= maxChannelSpecificKey) HICR_THROW_LOGIC("Channel-specific key %lu exceeds maximum: %lu\n", channelKey, maxChannelSpecificKey); @@ -163,13 +180,10 @@ class Base // Creating global key const auto globalKey = HiCR::GlobalMemorySlot::globalKey_t( - edgeIndex << (maxProducerPartitionIndexBits + maxConsumerPartitionIndexBits + maxReplicaIndexBits + maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | - producerPartitionIndex << (maxConsumerPartitionIndexBits + maxReplicaIndexBits + maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | - consumerPartitionIndex << (maxReplicaIndexBits + maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | - replicaIndex << (maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | - edgeType << (maxChannelSpecificKeyBits + initialBit) | - channelKey << (initialBit) - ); + edgeIndex << (maxProducerPartitionIndexBits + maxConsumerPartitionIndexBits + maxReplicaIndexBits + maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | + producerPartitionIndex << (maxConsumerPartitionIndexBits + maxReplicaIndexBits + maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | + consumerPartitionIndex << (maxReplicaIndexBits + maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | + replicaIndex << (maxChannelSpecificKeyBits + maxEdgeTypeBits + initialBit) | edgeType << (maxChannelSpecificKeyBits + initialBit) | channelKey << (initialBit)); // printf("Key: %lu = f(%lu, %lu, %u, %lu)\n", globalKey, edgeIndex, replicaIndex, edgeType, channelKey); @@ -179,15 +193,15 @@ class Base // Assigning keys to the global slots to exchange between consumer and producer sides of this edge // For the data channel - static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerSizesBufferKey = 0; - static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerPayloadBufferKey = 1; - static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerCoordinationBufferforSizesKey = 2; + static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerSizesBufferKey = 0; + static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerPayloadBufferKey = 1; + static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerCoordinationBufferforSizesKey = 2; static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelConsumerCoordinationBufferforPayloadKey = 3; - static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelProducerCoordinationBufferforSizesKey = 4; + static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelProducerCoordinationBufferforSizesKey = 4; static constexpr HiCR::GlobalMemorySlot::globalKey_t _dataChannelProducerCoordinationBufferforPayloadKey = 5; // For the metadata channel - static constexpr HiCR::GlobalMemorySlot::globalKey_t _metadataChannelConsumerPayloadBufferKey = 6; + static constexpr HiCR::GlobalMemorySlot::globalKey_t _metadataChannelConsumerPayloadBufferKey = 6; static constexpr HiCR::GlobalMemorySlot::globalKey_t _metadataChannelConsumerCoordinationBufferKey = 7; static constexpr HiCR::GlobalMemorySlot::globalKey_t _metadataChannelProducerCoordinationBufferKey = 8; @@ -216,12 +230,12 @@ class Base std::shared_ptr _metadataChannelProducerCoordinationBuffer; // Edge configuration and identification variables - const configuration::Edge _edgeConfig; - const edgeType_t _edgeType; - const configuration::Edge::edgeIndex_t _edgeIndex; + const configuration::Edge _edgeConfig; + const edgeType_t _edgeType; + const configuration::Edge::edgeIndex_t _edgeIndex; const configuration::Partition::partitionIndex_t _producerPartitionIndex; const configuration::Partition::partitionIndex_t _consumerPartitionIndex; - const configuration::Replica::replicaIndex_t _replicaIndex; + const configuration::Replica::replicaIndex_t _replicaIndex; // Mutex to prevent concurrent access to the thread-unsafe channels std::mutex _lock; diff --git a/include/hllm/edge/input.hpp b/include/hllm/edge/input.hpp index 7c128f9..5ea8e0d 100644 --- a/include/hllm/edge/input.hpp +++ b/include/hllm/edge/input.hpp @@ -12,22 +12,22 @@ class Input final : public Base { public: - Input(const configuration::Edge edgeConfig, - const edgeType_t edgeType, - const configuration::Edge::edgeIndex_t edgeIndex, + Input(const configuration::Edge edgeConfig, + const edgeType_t edgeType, + const configuration::Edge::edgeIndex_t edgeIndex, const configuration::Partition::partitionIndex_t producerPartitionIndex, const configuration::Partition::partitionIndex_t consumerPartitionIndex, - const configuration::Replica::replicaIndex_t replicaIndex) : - Base(edgeConfig, edgeType, edgeIndex, producerPartitionIndex, consumerPartitionIndex, replicaIndex) + const configuration::Replica::replicaIndex_t replicaIndex) + : Base(edgeConfig, edgeType, edgeIndex, producerPartitionIndex, consumerPartitionIndex, replicaIndex) { ///// Allocating additional local buffers required for the consumer data channel // Allocating sizes buffer as a local memory slot - auto sizesBufferSize = HiCR::channel::variableSize::Base::getTokenBufferSize(sizeof(size_t), _edgeConfig.getBufferCapacity()); - _dataChannelSizesBuffer = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), sizesBufferSize); + auto sizesBufferSize = HiCR::channel::variableSize::Base::getTokenBufferSize(sizeof(size_t), _edgeConfig.getBufferCapacity()); + _dataChannelSizesBuffer = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), sizesBufferSize); // Allocating payload buffer as a local memory slot - auto payloadBufferSize = HiCR::channel::variableSize::SPSC::Consumer::getPayloadBufferSize(_edgeConfig.getBufferSize()); + auto payloadBufferSize = HiCR::channel::variableSize::SPSC::Consumer::getPayloadBufferSize(_edgeConfig.getBufferSize()); _dataChannelPayloadBuffer = _edgeConfig.getPayloadMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getPayloadMemorySpace(), payloadBufferSize); ///// Allocating additional local buffers required for the consumer medata channel @@ -47,7 +47,7 @@ class Input final : public Base _edgeConfig.getCoordinationMemoryManager()->freeLocalMemorySlot(_metadataChannelPayloadBuffer); } - __INLINE__ void getMemorySlotsToExchange(std::vector& memorySlots) const override + __INLINE__ void getMemorySlotsToExchange(std::vector &memorySlots) const override { // printf("Exchanging %lu\n", encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforSizesKey) ); // printf("Exchanging %lu\n", encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforPayloadKey) ); @@ -57,19 +57,37 @@ class Input final : public Base // printf("Exchanging %lu\n", encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerPayloadBufferKey)); // Getting key / memory slot pairs for the data channel - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforSizesKey), .memorySlot = _dataChannelLocalCoordinationBufferForSizes } ); - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforPayloadKey), .memorySlot = _dataChannelLocalCoordinationBufferForPayloads } ); - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerSizesBufferKey), .memorySlot = _dataChannelSizesBuffer } ); - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getPayloadCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerPayloadBufferKey), .memorySlot = _dataChannelPayloadBuffer } ); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforSizesKey), + .memorySlot = _dataChannelLocalCoordinationBufferForSizes}); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerCoordinationBufferforPayloadKey), + .memorySlot = _dataChannelLocalCoordinationBufferForPayloads}); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerSizesBufferKey), + .memorySlot = _dataChannelSizesBuffer}); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getPayloadCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelConsumerPayloadBufferKey), + .memorySlot = _dataChannelPayloadBuffer}); // Getting key / memory slot pairs for the meta data channel - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerCoordinationBufferKey), .memorySlot = _metadataChannelLocalCoordinationBuffer } ); - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerPayloadBufferKey), .memorySlot = _metadataChannelPayloadBuffer } ); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerCoordinationBufferKey), + .memorySlot = _metadataChannelLocalCoordinationBuffer}); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelConsumerPayloadBufferKey), + .memorySlot = _metadataChannelPayloadBuffer}); } // Function to check for incoming messages in the edge __INLINE__ bool hasMessage() const - { + { _metadataChannel->updateDepth(); // printf("Input Metadata Depth: %lu\n", _metadataChannel->getDepth()); if (_metadataChannel->isEmpty() == true) return false; @@ -80,24 +98,24 @@ class Input final : public Base if (_dataChannel->isEmpty() == true) return false; return true; - } + } __INLINE__ Message getMessage() const { if (hasMessage() == false) HICR_THROW_RUNTIME("Trying to get message when there is none available. This is a bug in hLLM."); // Receiving message - const auto dataBufferPtr = (uint8_t*) _dataChannel->getPayloadBufferMemorySlot()->getSourceLocalMemorySlot()->getPointer(); - const auto dataToken = _dataChannel->peek(); - const auto dataMessagePos = dataToken[0]; - const auto dataMessagePtr = &dataBufferPtr[dataMessagePos]; + const auto dataBufferPtr = (uint8_t *)_dataChannel->getPayloadBufferMemorySlot()->getSourceLocalMemorySlot()->getPointer(); + const auto dataToken = _dataChannel->peek(); + const auto dataMessagePos = dataToken[0]; + const auto dataMessagePtr = &dataBufferPtr[dataMessagePos]; const auto dataMessageSize = dataToken[1]; - const auto metadataBufferPtr = (Message::metadata_t*) _metadataChannel->getTokenBuffer()->getSourceLocalMemorySlot()->getPointer(); - const auto metadataToken = _metadataChannel->peek(); - const auto metadataMessagePos = metadataToken; - const auto metadataMessagePtr = &metadataBufferPtr[metadataMessagePos]; - const Message::metadata_t metadata = *metadataMessagePtr; + const auto metadataBufferPtr = (Message::metadata_t *)_metadataChannel->getTokenBuffer()->getSourceLocalMemorySlot()->getPointer(); + const auto metadataToken = _metadataChannel->peek(); + const auto metadataMessagePos = metadataToken; + const auto metadataMessagePtr = &metadataBufferPtr[metadataMessagePos]; + const Message::metadata_t metadata = *metadataMessagePtr; // printf("Receiving, from Pos: %lu\n", metadataMessagePos); // for(size_t i = 0; i < sizeof(hLLM::edge::Message::metadata_t); i++) printf(" 0x%2X ", ((uint8_t*)&metadata)[i]); @@ -120,30 +138,26 @@ class Input final : public Base __INLINE__ void createChannels() override { // Creating consumer data channel - _dataChannel = std::make_shared( - *_edgeConfig.getCoordinationCommunicationManager(), - *_edgeConfig.getPayloadCommunicationManager(), - _dataChannelConsumerPayloadBuffer /*payload buffer */, - _dataChannelConsumerSizesBuffer, - _dataChannelConsumerCoordinationBufferForSizes->getSourceLocalMemorySlot(), - _dataChannelConsumerCoordinationBufferForPayloads->getSourceLocalMemorySlot(), - _dataChannelProducerCoordinationBufferForSizes, - _dataChannelProducerCoordinationBufferForPayloads, - _edgeConfig.getBufferSize(), - _edgeConfig.getBufferCapacity() - ); + _dataChannel = std::make_shared(*_edgeConfig.getCoordinationCommunicationManager(), + *_edgeConfig.getPayloadCommunicationManager(), + _dataChannelConsumerPayloadBuffer /*payload buffer */, + _dataChannelConsumerSizesBuffer, + _dataChannelConsumerCoordinationBufferForSizes->getSourceLocalMemorySlot(), + _dataChannelConsumerCoordinationBufferForPayloads->getSourceLocalMemorySlot(), + _dataChannelProducerCoordinationBufferForSizes, + _dataChannelProducerCoordinationBufferForPayloads, + _edgeConfig.getBufferSize(), + _edgeConfig.getBufferCapacity()); // Creating consumer data channel - _metadataChannel = std::make_shared( - *_edgeConfig.getCoordinationCommunicationManager(), - *_edgeConfig.getCoordinationCommunicationManager(), - _metadataChannelConsumerPayloadBuffer, - _metadataChannelConsumerCoordinationBuffer->getSourceLocalMemorySlot(), - _metadataChannelProducerCoordinationBuffer, - sizeof(Message::metadata_t), - _edgeConfig.getBufferCapacity() - ); - } + _metadataChannel = std::make_shared(*_edgeConfig.getCoordinationCommunicationManager(), + *_edgeConfig.getCoordinationCommunicationManager(), + _metadataChannelConsumerPayloadBuffer, + _metadataChannelConsumerCoordinationBuffer->getSourceLocalMemorySlot(), + _metadataChannelProducerCoordinationBuffer, + sizeof(Message::metadata_t), + _edgeConfig.getBufferCapacity()); + } // Buffers associated with the data channel std::shared_ptr _dataChannelSizesBuffer; @@ -154,7 +168,7 @@ class Input final : public Base // The HiCR channels we use to communicate std::shared_ptr _dataChannel; - std::shared_ptr _metadataChannel; + std::shared_ptr _metadataChannel; }; // class Input diff --git a/include/hllm/edge/message.hpp b/include/hllm/edge/message.hpp index 6f39ad3..2efa645 100644 --- a/include/hllm/edge/message.hpp +++ b/include/hllm/edge/message.hpp @@ -5,47 +5,47 @@ namespace hLLM { - typedef uint64_t messageId_t; - typedef uint64_t sessionId_t; +typedef uint64_t messageId_t; +typedef uint64_t sessionId_t; - namespace edge - { +namespace edge +{ - class Message final - { - public: +class Message final +{ + public: + + typedef uint64_t messageType_t; - typedef uint64_t messageType_t; +#pragma pack(push, 1) + struct metadata_t + { + messageType_t type; + sessionId_t sessionId; + messageId_t messageId; + }; +#pragma pack(pop) - #pragma pack(push, 1) - struct metadata_t - { - messageType_t type; - sessionId_t sessionId; - messageId_t messageId; - }; - #pragma pack(pop) + Message() = delete; + Message(const uint8_t *const data, const size_t size, const metadata_t metadata) + : _data(data), + _size(size), + _metadata(metadata) + {} - Message() = delete; - Message(const uint8_t* const data, const size_t size, const metadata_t metadata) : - _data(data), - _size(size), - _metadata(metadata) - { } + virtual ~Message() = default; - virtual ~Message() = default; + const uint8_t *getData() const { return _data; } + size_t getSize() const { return _size; } + const metadata_t &getMetadata() const { return _metadata; } - const uint8_t* getData() const { return _data; } - size_t getSize() const { return _size; } - const metadata_t& getMetadata() const { return _metadata; } + private: - private: + const uint8_t *const _data; + const size_t _size; + const metadata_t _metadata; - const uint8_t* const _data; - const size_t _size; - const metadata_t _metadata; - - }; // class Message +}; // class Message - } // namespace edge +} // namespace edge } // namespace hLLM \ No newline at end of file diff --git a/include/hllm/edge/output.hpp b/include/hllm/edge/output.hpp index efffcb0..ebbcd40 100644 --- a/include/hllm/edge/output.hpp +++ b/include/hllm/edge/output.hpp @@ -13,15 +13,15 @@ class Output final : public Base { public: - Output(const configuration::Edge edgeConfig, - const edgeType_t edgeType, - const configuration::Edge::edgeIndex_t edgeIndex, - const configuration::Partition::partitionIndex_t producerPartitionIndex, - const configuration::Partition::partitionIndex_t consumerPartitionIndex, - const configuration::Replica::replicaIndex_t replicaIndex) : - Base(edgeConfig, edgeType, edgeIndex, producerPartitionIndex, consumerPartitionIndex, replicaIndex) + Output(const configuration::Edge edgeConfig, + const edgeType_t edgeType, + const configuration::Edge::edgeIndex_t edgeIndex, + const configuration::Partition::partitionIndex_t producerPartitionIndex, + const configuration::Partition::partitionIndex_t consumerPartitionIndex, + const configuration::Replica::replicaIndex_t replicaIndex) + : Base(edgeConfig, edgeType, edgeIndex, producerPartitionIndex, consumerPartitionIndex, replicaIndex) { - _dataChannelProducerSizeInfoBuffer = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), sizeof(size_t)); + _dataChannelProducerSizeInfoBuffer = _edgeConfig.getCoordinationMemoryManager()->allocateLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), sizeof(size_t)); } ~Output() @@ -31,22 +31,31 @@ class Output final : public Base } public: - - __INLINE__ void getMemorySlotsToExchange(std::vector& memorySlots) const override + + __INLINE__ void getMemorySlotsToExchange(std::vector &memorySlots) const override { // printf("Exchanging %lu\n", encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforSizesKey)); // printf("Exchanging %lu\n", encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforPayloadKey)); // printf("Exchanging %lu\n", encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelProducerCoordinationBufferKey)); // Getting key / memory slot pairs - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforSizesKey), .memorySlot = _dataChannelLocalCoordinationBufferForSizes } ); - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforPayloadKey), .memorySlot = _dataChannelLocalCoordinationBufferForPayloads } ); - memorySlots.push_back( memorySlotExchangeInfo_t { .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelProducerCoordinationBufferKey), .memorySlot = _metadataChannelLocalCoordinationBuffer } ); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforSizesKey), + .memorySlot = _dataChannelLocalCoordinationBufferForSizes}); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _dataChannelProducerCoordinationBufferforPayloadKey), + .memorySlot = _dataChannelLocalCoordinationBufferForPayloads}); + memorySlots.push_back(memorySlotExchangeInfo_t{ + .communicationManager = _edgeConfig.getCoordinationCommunicationManager(), + .globalKey = encodeGlobalKey(_edgeIndex, _producerPartitionIndex, _consumerPartitionIndex, _replicaIndex, _edgeType, _metadataChannelProducerCoordinationBufferKey), + .memorySlot = _metadataChannelLocalCoordinationBuffer}); } - // Function to check whether the output channels are full, given a message size to be sent + // Function to check whether the output channels are full, given a message size to be sent __INLINE__ bool isFull(const size_t msgSize) const - { + { // Requesting the re-check of the channel's usage _metadataChannel->updateDepth(); @@ -60,7 +69,7 @@ class Output final : public Base // Check if both are not empty return false; - } + } // This function locks and unlocks the edge until it has enough space for the message to be sent __INLINE__ void pushMessageLocking(const Message message) @@ -70,11 +79,11 @@ class Output final : public Base { lock(); isEdgeFull = isFull(message.getSize()); - if (isEdgeFull == true) unlock(); + if (isEdgeFull == true) unlock(); } pushMessage(message); - + unlock(); } @@ -82,14 +91,16 @@ class Output final : public Base { if (isFull(message.getSize()) == true) HICR_THROW_RUNTIME("Trying to push a message when channel is full. This is a bug in hLLM."); - auto messagePayloadMemorySlot = _edgeConfig.getPayloadMemoryManager()->registerLocalMemorySlot(_edgeConfig.getPayloadMemorySpace(), (void*)message.getData(), message.getSize()); + auto messagePayloadMemorySlot = + _edgeConfig.getPayloadMemoryManager()->registerLocalMemorySlot(_edgeConfig.getPayloadMemorySpace(), (void *)message.getData(), message.getSize()); _dataChannel->push(messagePayloadMemorySlot); // auto metadata = message.getMetadata(); // printf("Pushing: \n"); // for(size_t i = 0; i < sizeof(hLLM::edge::Message::metadata_t); i++) printf(" 0x%2X ", ((uint8_t*)&metadata)[i]); // printf("\n"); - auto messageMetadataMemorySlot = _edgeConfig.getCoordinationMemoryManager()->registerLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), (void*)&message.getMetadata(), sizeof(Message::metadata_t)); + auto messageMetadataMemorySlot = + _edgeConfig.getCoordinationMemoryManager()->registerLocalMemorySlot(_edgeConfig.getCoordinationMemorySpace(), (void *)&message.getMetadata(), sizeof(Message::metadata_t)); _metadataChannel->push(messageMetadataMemorySlot); _edgeConfig.getPayloadMemoryManager()->deregisterLocalMemorySlot(messagePayloadMemorySlot); @@ -103,31 +114,27 @@ class Output final : public Base __INLINE__ void createChannels() override { // Creating producer data channel - _dataChannel = std::make_shared( - *_edgeConfig.getCoordinationCommunicationManager(), - *_edgeConfig.getPayloadCommunicationManager(), - _dataChannelProducerSizeInfoBuffer, - _dataChannelConsumerPayloadBuffer, - _dataChannelConsumerSizesBuffer, - _dataChannelProducerCoordinationBufferForSizes->getSourceLocalMemorySlot(), - _dataChannelProducerCoordinationBufferForPayloads->getSourceLocalMemorySlot(), - _dataChannelConsumerCoordinationBufferForSizes, - _dataChannelConsumerCoordinationBufferForPayloads, - _edgeConfig.getBufferSize(), - sizeof(uint8_t), - _edgeConfig.getBufferCapacity() - ); + _dataChannel = std::make_shared(*_edgeConfig.getCoordinationCommunicationManager(), + *_edgeConfig.getPayloadCommunicationManager(), + _dataChannelProducerSizeInfoBuffer, + _dataChannelConsumerPayloadBuffer, + _dataChannelConsumerSizesBuffer, + _dataChannelProducerCoordinationBufferForSizes->getSourceLocalMemorySlot(), + _dataChannelProducerCoordinationBufferForPayloads->getSourceLocalMemorySlot(), + _dataChannelConsumerCoordinationBufferForSizes, + _dataChannelConsumerCoordinationBufferForPayloads, + _edgeConfig.getBufferSize(), + sizeof(uint8_t), + _edgeConfig.getBufferCapacity()); // Creating producer metadata channel - _metadataChannel = std::make_shared( - *_edgeConfig.getCoordinationCommunicationManager(), - *_edgeConfig.getCoordinationCommunicationManager(), - _metadataChannelConsumerPayloadBuffer, - _metadataChannelProducerCoordinationBuffer->getSourceLocalMemorySlot(), - _metadataChannelConsumerCoordinationBuffer, - sizeof(Message::metadata_t), - _edgeConfig.getBufferCapacity() - ); + _metadataChannel = std::make_shared(*_edgeConfig.getCoordinationCommunicationManager(), + *_edgeConfig.getCoordinationCommunicationManager(), + _metadataChannelConsumerPayloadBuffer, + _metadataChannelProducerCoordinationBuffer->getSourceLocalMemorySlot(), + _metadataChannelConsumerCoordinationBuffer, + sizeof(Message::metadata_t), + _edgeConfig.getBufferCapacity()); } // Internal memory slot for data channel producer coordination buffer @@ -135,8 +142,8 @@ class Output final : public Base // The HiCR channels we use to communicate std::shared_ptr _dataChannel; - std::shared_ptr _metadataChannel; - + std::shared_ptr _metadataChannel; + }; // class Output } // namespace hLLM::edge \ No newline at end of file diff --git a/include/hllm/engine.hpp b/include/hllm/engine.hpp index a7bb47b..7f0dfc7 100644 --- a/include/hllm/engine.hpp +++ b/include/hllm/engine.hpp @@ -25,9 +25,9 @@ class Engine final { public: - Engine(HiCR::InstanceManager *instanceManager, - HiCR::frontend::RPCEngine *rpcEngine, - taskr::Runtime *taskr, + Engine(HiCR::InstanceManager *instanceManager, + HiCR::frontend::RPCEngine *rpcEngine, + taskr::Runtime *taskr, const HiCR::GlobalMemorySlot::tag_t exchangeTag = __HLLM_DEFAULT_EXCHANGE_TAG) : _instanceManager(instanceManager), _rpcEngine(rpcEngine), @@ -36,10 +36,11 @@ class Engine final _exchangeTag(exchangeTag) { // Registering entry point function for partition coordinators / replicas. Not for the deployment launcher - _rpcEngine->addRPCTarget(__HLLM_WORKER_ENTRY_POINT_RPC_NAME, HiCR::backend::pthreads::ComputeManager::createExecutionUnit([this](void*) { entryPoint(); })); + _rpcEngine->addRPCTarget(__HLLM_WORKER_ENTRY_POINT_RPC_NAME, HiCR::backend::pthreads::ComputeManager::createExecutionUnit([this](void *) { entryPoint(); })); // Registering deployment information request - _rpcEngine->addRPCTarget(__HLLM_REQUEST_DEPLOYMENT_CONFIGURATION_RPC_NAME, HiCR::backend::pthreads::ComputeManager::createExecutionUnit([this](void *) { attendDeploymentConfigurationRequest(); })); + _rpcEngine->addRPCTarget(__HLLM_REQUEST_DEPLOYMENT_CONFIGURATION_RPC_NAME, + HiCR::backend::pthreads::ComputeManager::createExecutionUnit([this](void *) { attendDeploymentConfigurationRequest(); })); // Registering finalization function (for root to execute) _rpcEngine->addRPCTarget(__HLLM_BROADCAST_DEPLOYMENT_STOP_RPC_NAME, HiCR::backend::pthreads::ComputeManager::createExecutionUnit([this](void *) { doLocalTermination(); })); @@ -101,7 +102,7 @@ class Engine final _deployment = deployment; // Getting the instances involved in the deployment (only relevant for the deployer instance) - for (const auto& partition : _deployment.getPartitions()) + for (const auto &partition : _deployment.getPartitions()) { // Getting the instance id assigned to the partition coordinator const auto coordinatorInstanceId = partition->getCoordinatorInstanceId(); @@ -110,7 +111,7 @@ class Engine final _instanceSet.insert(coordinatorInstanceId); // Now getting all replicas for the replica - for (const auto& replica : partition->getReplicas()) + for (const auto &replica : partition->getReplicas()) { // Getting the instance id assigned to the replica const auto replicaInstanceId = replica->getInstanceId(); @@ -125,14 +126,18 @@ class Engine final if (instanceId != _deployerInstanceId) // If it's not me, listen for a deployment configuration request { printf("Deployer %lu: Listening for instance %lu\n", _instanceId, instanceId); - _rpcEngine->listen(); + _rpcEngine->listen(); } } __INLINE__ void deploy(const configuration::Deployment deployment) { // If I am not the deployer instance await for the deployment launcher to request us to start - if (_instanceId != _deployerInstanceId) { _rpcEngine->listen(); return; } + if (_instanceId != _deployerInstanceId) + { + _rpcEngine->listen(); + return; + } // Considering whether the deployment launcher is actually part of the deployment bool isLauncherInDeployment = false; @@ -177,27 +182,27 @@ class Engine final { const auto ¤tInstance = *_instanceManager->getCurrentInstance(); - // If I am the deployer instance, + // If I am the deployer instance, if (currentInstance.getId() == _deployerInstanceId) { - printf("[hLLM] Deployer instance %lu finalizing hLLM...\n", currentInstance.getId()); + printf("[hLLM] Deployer instance %lu finalizing hLLM...\n", currentInstance.getId()); + + // Broadcast termination to others (and myself) + broadcastTermination(); - // Broadcast termination to others (and myself) - broadcastTermination(); - - // (deployer) Executing local termination myself now - doLocalTermination(); + // (deployer) Executing local termination myself now + doLocalTermination(); - // Return - return; - } + // Return + return; + } // If I am not the deployer instance, request the deployer to please broadcast terminationp printf("[hLLM] Instance %lu requesting deployer instance %lu to finish execution.\n", currentInstance.getId(), _deployerInstanceId); _rpcEngine->requestRPC(_deployerInstanceId, __HLLM_REQUEST_DEPLOYMENT_STOP_RPC_NAME); } - [[nodiscard]] __INLINE__ auto& getDeployment() { return _deployment; } + [[nodiscard]] __INLINE__ auto &getDeployment() { return _deployment; } [[nodiscard]] __INLINE__ size_t getPartitionIdx() const { return _partitionIdx; } [[nodiscard]] __INLINE__ std::shared_ptr createSession() @@ -264,7 +269,7 @@ class Engine final __INLINE__ void entryPoint() { // Indexes corresponding to the roles assigned to this instance - std::vector coordinatorRoleIndexes; + std::vector coordinatorRoleIndexes; std::vector> replicaRoleIndexes; // Sanity checks on the deployment object @@ -294,7 +299,7 @@ class Engine final } // If I am a partition coordinator, construct the coordinator object - for (const auto& coordinatorRoleIndex : coordinatorRoleIndexes) + for (const auto &coordinatorRoleIndex : coordinatorRoleIndexes) { printf("[Instance %lu] I am a partition %lu coordinator\n", _instanceId, coordinatorRoleIndex); auto coordinatorRole = std::make_shared(_deployment, coordinatorRoleIndex, _taskr); @@ -305,7 +310,7 @@ class Engine final // If I am a replica, construct the replica object: // Note: An instance can be simultaneously a partition coordinator and a replica - for (const auto& replicaRoleIndex : replicaRoleIndexes) + for (const auto &replicaRoleIndex : replicaRoleIndexes) { printf("[Instance %lu] I am a partition %lu replica %lu\n", _instanceId, replicaRoleIndex.first, replicaRoleIndex.second); auto replicaRole = std::make_shared(_deployment, replicaRoleIndex.first, replicaRoleIndex.second, _taskr, _registeredFunctions); @@ -328,21 +333,21 @@ class Engine final if (_instanceRoles.empty()) HICR_THROW_RUNTIME("Instance %lu is involved in the deployment but no role has been asigned to it.", _instanceId); ////////// Exchange memory slots now - - // Storage for the initial set of HiCR memory slots to exchange for the creation of edges. + + // Storage for the initial set of HiCR memory slots to exchange for the creation of edges. // This is a low-level aspect that normally shouldn't be exposed at this level, but it is required // for all partitions to partitipate since we still don't support peer-to-peer memory slot exchange std::vector memorySlotsToExchange; // Getting memory slots to exchange - for (const auto& role : _instanceRoles) role->getMemorySlotsToExchange(memorySlotsToExchange); + for (const auto &role : _instanceRoles) role->getMemorySlotsToExchange(memorySlotsToExchange); // printf("[Instance %lu] Memory Slots to exchange: %lu\n", _instanceId, memorySlotsToExchange.size()); // Finding all distinct communication managers and storing them in the order in which they were declared. // This is important for all intervening instances to do the exchange in the same order - std::set communicationManagerSet; - std::vector communicationManagerVector; + std::set communicationManagerSet; + std::vector communicationManagerVector; // Adding control communication manager const auto controlCommunicationManager = _deployment.getControlBuffer().communicationManager; @@ -350,7 +355,7 @@ class Engine final communicationManagerVector.push_back(controlCommunicationManager); // Adding edge-specific communication managers - for (const auto& edge : _deployment.getEdges()) + for (const auto &edge : _deployment.getEdges()) { const auto coordinationComunicationManager = edge->getCoordinationCommunicationManager(); if (communicationManagerSet.contains(coordinationComunicationManager) == false) @@ -368,11 +373,11 @@ class Engine final } // Now creating a map of memory slots to exchange, mapped by communication manager - std::map> exchangeMap; - for (const auto& entry : memorySlotsToExchange) + std::map> exchangeMap; + for (const auto &entry : memorySlotsToExchange) { // Getting the communication manager used for this memory slot - const auto& communicationManager = entry.communicationManager; + const auto &communicationManager = entry.communicationManager; // Sanity check if (communicationManagerSet.contains(communicationManager) == false) HICR_THROW_RUNTIME("Could not find communication manager in the set. This is a bug in hLLM"); @@ -380,7 +385,7 @@ class Engine final // Adding memory slot to the exchange map // printf("Exchanging Memory Slot with key: %lu\n", entry.globalKey); exchangeMap[communicationManager].push_back(HiCR::CommunicationManager::globalKeyMemorySlotPair_t(entry.globalKey, entry.memorySlot)); - } + } // Finally, doing the exchange, one communication manager at a time, in the order given by the edge ordering printf("[Instance %lu] Exchanging Memory Slots...\n", _instanceId); @@ -390,19 +395,19 @@ class Engine final for (const auto communicationManager : communicationManagerVector) communicationManager->fence(_exchangeTag); // After the exchange, we can now initialize the edges - for (const auto& role : _instanceRoles) role->initializeEdges(_exchangeTag); + for (const auto &role : _instanceRoles) role->initializeEdges(_exchangeTag); // Initializing TaskR _taskr->initialize(); // Initializing roles - for (const auto& role : _instanceRoles) role->initialize(); + for (const auto &role : _instanceRoles) role->initialize(); // Instruct TaskR to re-add suspended tasks _taskr->setTaskCallbackHandler(HiCR::tasking::Task::callback_t::onTaskSuspend, [&](taskr::Task *task) { _taskr->resumeTask(task); }); // Release task memory upon finalization - _taskr->setTaskCallbackHandler(HiCR::tasking::Task::callback_t::onTaskFinish, [&](taskr::Task *task) { delete (hLLM::Task*)task; }); + _taskr->setTaskCallbackHandler(HiCR::tasking::Task::callback_t::onTaskFinish, [&](taskr::Task *task) { delete (hLLM::Task *)task; }); // The engine is now fully deployed _isDeployed = true; @@ -424,13 +429,13 @@ class Engine final // For every new partition instance created, we send it the serialized deployment configuration __INLINE__ void attendDeploymentConfigurationRequest() { - printf("[Deployer Instance %lu] Received request to send deployment configuration...\n", _instanceId); - - // Serializing - const auto serializedDeployment = _deployment.serialize().dump(); + printf("[Deployer Instance %lu] Received request to send deployment configuration...\n", _instanceId); + + // Serializing + const auto serializedDeployment = _deployment.serialize().dump(); - // Returning serialized topology - _rpcEngine->submitReturnValue((void *)serializedDeployment.c_str(), serializedDeployment.size() + 1); + // Returning serialized topology + _rpcEngine->submitReturnValue((void *)serializedDeployment.c_str(), serializedDeployment.size() + 1); } // Pointer to the instance's roles assigned to this instance @@ -458,7 +463,7 @@ class Engine final HiCR::frontend::RPCEngine *_rpcEngine; // TaskR instance - taskr::Runtime* const _taskr; + taskr::Runtime *const _taskr; // My instance Id const HiCR::Instance::instanceId_t _instanceId; diff --git a/include/hllm/messages/base.hpp b/include/hllm/messages/base.hpp index e63f900..d17612d 100644 --- a/include/hllm/messages/base.hpp +++ b/include/hllm/messages/base.hpp @@ -11,9 +11,9 @@ namespace hLLM::messages #define __HLLM__BASE_MESSAGE_ID__ 128 enum messageTypes : edge::Message::messageType_t { - heartbeat = __HLLM__BASE_MESSAGE_ID__ + 0, - prompt = __HLLM__BASE_MESSAGE_ID__ + 1, - data = __HLLM__BASE_MESSAGE_ID__ + 2, + heartbeat = __HLLM__BASE_MESSAGE_ID__ + 0, + prompt = __HLLM__BASE_MESSAGE_ID__ + 1, + data = __HLLM__BASE_MESSAGE_ID__ + 2, replicaReady = __HLLM__BASE_MESSAGE_ID__ + 3 }; @@ -21,14 +21,14 @@ class Base { public: - virtual void decode(const edge::Message& rawMessage) = 0; - virtual edge::Message encode() const = 0; - virtual edge::Message::messageType_t getType() const = 0; + virtual void decode(const edge::Message &rawMessage) = 0; + virtual edge::Message encode() const = 0; + virtual edge::Message::messageType_t getType() const = 0; protected: - Base() = default; + Base() = default; virtual ~Base() = default; }; // class Base -} // namespace hLLM \ No newline at end of file +} // namespace hLLM::messages \ No newline at end of file diff --git a/include/hllm/messages/data.hpp b/include/hllm/messages/data.hpp index c4de20c..b8480d0 100644 --- a/include/hllm/messages/data.hpp +++ b/include/hllm/messages/data.hpp @@ -15,44 +15,46 @@ class Data final : public Base public: Data() = default; - Data(const edge::Message& rawMessage) : Base() + Data(const edge::Message &rawMessage) + : Base() { decode(rawMessage); } - Data(const uint8_t* data, const size_t size, Prompt::promptId_t promptId) : - Base(), - _data(data), - _size(size), - _promptId(promptId) { } + Data(const uint8_t *data, const size_t size, Prompt::promptId_t promptId) + : Base(), + _data(data), + _size(size), + _promptId(promptId) + {} ~Data() = default; __INLINE__ edge::Message::messageType_t getType() const override { return _type; } - __INLINE__ void decode(const edge::Message& rawMessage) override + __INLINE__ void decode(const edge::Message &rawMessage) override { const auto messageType = rawMessage.getMetadata().type; if (messageType != getType()) HICR_THROW_RUNTIME("Message type %lu being decoded by class of type %lu. This is a bug in hLLM", messageType, getType()); - _data = rawMessage.getData(); - _size = rawMessage.getSize(); - _promptId = { rawMessage.getMetadata().sessionId, rawMessage.getMetadata().messageId }; + _data = rawMessage.getData(); + _size = rawMessage.getSize(); + _promptId = {rawMessage.getMetadata().sessionId, rawMessage.getMetadata().messageId}; } __INLINE__ edge::Message encode() const override - { - edge::Message rawMessage(_data, _size, edge::Message::metadata_t( { .type = getType(), .sessionId = _promptId.first, .messageId = _promptId.second })); + { + edge::Message rawMessage(_data, _size, edge::Message::metadata_t({.type = getType(), .sessionId = _promptId.first, .messageId = _promptId.second})); return rawMessage; } - __INLINE__ const uint8_t* getData() const { return _data; } - __INLINE__ const size_t getSize() const { return _size; } + __INLINE__ const uint8_t *getData() const { return _data; } + __INLINE__ const size_t getSize() const { return _size; } __INLINE__ const Prompt::promptId_t getPromptId() const { return _promptId; } private: - - const uint8_t* _data; - size_t _size; + + const uint8_t *_data; + size_t _size; Prompt::promptId_t _promptId; }; // class Prompt diff --git a/include/hllm/messages/heartbeat.hpp b/include/hllm/messages/heartbeat.hpp index 2d04e4a..8ab262d 100644 --- a/include/hllm/messages/heartbeat.hpp +++ b/include/hllm/messages/heartbeat.hpp @@ -9,13 +9,14 @@ class Heartbeat final : public Base { private: - const edge::Message::messageType_t _type = messageTypes::heartbeat; - const std::string _signature = "Heartbeat"; + const edge::Message::messageType_t _type = messageTypes::heartbeat; + const std::string _signature = "Heartbeat"; public: Heartbeat() = default; - Heartbeat(const edge::Message& rawMessage) : Base() + Heartbeat(const edge::Message &rawMessage) + : Base() { decode(rawMessage); } @@ -23,26 +24,27 @@ class Heartbeat final : public Base __INLINE__ edge::Message::messageType_t getType() const override { return _type; } - __INLINE__ void decode(const edge::Message& rawMessage) override + __INLINE__ void decode(const edge::Message &rawMessage) override { const auto messageType = rawMessage.getMetadata().type; if (messageType != getType()) HICR_THROW_RUNTIME("Message type %lu being decoded by class of type %lu. This is a bug in hLLM", messageType, getType()); - const auto signatureSize = rawMessage.getSize(); - const std::string signature = std::string((const char*)rawMessage.getData()); - if (signatureSize != _signature.size() + 1) HICR_THROW_RUNTIME("Heartbeat message has signature incorrect size (%lu != %lu, Recevied: '%s'). This is a bug in hLLM", signatureSize, _signature.size() + 1, signature.c_str()); + const auto signatureSize = rawMessage.getSize(); + const std::string signature = std::string((const char *)rawMessage.getData()); + if (signatureSize != _signature.size() + 1) + HICR_THROW_RUNTIME( + "Heartbeat message has signature incorrect size (%lu != %lu, Recevied: '%s'). This is a bug in hLLM", signatureSize, _signature.size() + 1, signature.c_str()); if (signature != _signature) HICR_THROW_RUNTIME("Heartbeat message has incorrect signature ('%s' != '%s'). This is a bug in hLLM", signature.c_str(), _signature.c_str()); } __INLINE__ edge::Message encode() const override - { - edge::Message rawMessage((uint8_t*)_signature.data(), _signature.size()+1, edge::Message::metadata_t( { .type = getType(), .sessionId = 0, .messageId = 0 })); + { + edge::Message rawMessage((uint8_t *)_signature.data(), _signature.size() + 1, edge::Message::metadata_t({.type = getType(), .sessionId = 0, .messageId = 0})); return rawMessage; } private: - }; // class Heartbeat } // namespace hLLM::messages \ No newline at end of file diff --git a/include/hllm/messages/prompt.hpp b/include/hllm/messages/prompt.hpp index 6f46adb..49bcfa5 100644 --- a/include/hllm/messages/prompt.hpp +++ b/include/hllm/messages/prompt.hpp @@ -14,42 +14,44 @@ class Prompt final : public Base public: Prompt() = default; - Prompt(const edge::Message& rawMessage) : Base() + Prompt(const edge::Message &rawMessage) + : Base() { decode(rawMessage); } - Prompt(const std::string& input, sessionId_t sessionId, messageId_t messageId) : - Base(), - _input(input), - _sessionId(sessionId), - _messageId(messageId) { } + Prompt(const std::string &input, sessionId_t sessionId, messageId_t messageId) + : Base(), + _input(input), + _sessionId(sessionId), + _messageId(messageId) + {} ~Prompt() = default; __INLINE__ edge::Message::messageType_t getType() const override { return _type; } - __INLINE__ void decode(const edge::Message& rawMessage) override + __INLINE__ void decode(const edge::Message &rawMessage) override { const auto messageType = rawMessage.getMetadata().type; if (messageType != getType()) HICR_THROW_RUNTIME("Message type %lu being decoded by class of type %lu. This is a bug in hLLM", messageType, getType()); _sessionId = rawMessage.getMetadata().sessionId; _messageId = rawMessage.getMetadata().messageId; - _input = std::string((const char*)rawMessage.getData(), rawMessage.getSize()); + _input = std::string((const char *)rawMessage.getData(), rawMessage.getSize()); } __INLINE__ edge::Message encode() const override - { - edge::Message rawMessage((uint8_t*)_input.data(), _input.size()+1, edge::Message::metadata_t( { .type = getType(), .sessionId = _sessionId, .messageId = _messageId })); + { + edge::Message rawMessage((uint8_t *)_input.data(), _input.size() + 1, edge::Message::metadata_t({.type = getType(), .sessionId = _sessionId, .messageId = _messageId})); return rawMessage; } - __INLINE__ const std::string& getInput() const { return _input; } - __INLINE__ const sessionId_t getSessionId() const { return _sessionId; } - __INLINE__ const messageId_t getMessageId() const { return _messageId; } + __INLINE__ const std::string &getInput() const { return _input; } + __INLINE__ const sessionId_t getSessionId() const { return _sessionId; } + __INLINE__ const messageId_t getMessageId() const { return _messageId; } private: - + std::string _input; sessionId_t _sessionId; messageId_t _messageId; diff --git a/include/hllm/messages/replicaReady.hpp b/include/hllm/messages/replicaReady.hpp index a581f45..eb79411 100644 --- a/include/hllm/messages/replicaReady.hpp +++ b/include/hllm/messages/replicaReady.hpp @@ -13,28 +13,28 @@ class ReplicaReady final : public Base public: - ReplicaReady(const edge::Message& rawMessage) : Base() + ReplicaReady(const edge::Message &rawMessage) + : Base() { decode(rawMessage); } - ReplicaReady() : Base() { } + ReplicaReady() + : Base() + {} ~ReplicaReady() = default; __INLINE__ edge::Message::messageType_t getType() const override { return _type; } - __INLINE__ void decode(const edge::Message& rawMessage) override - { - } + __INLINE__ void decode(const edge::Message &rawMessage) override {} __INLINE__ edge::Message encode() const override - { - edge::Message rawMessage(nullptr, 0, edge::Message::metadata_t( { .type = getType(), .sessionId = 0, .messageId = 0 })); + { + edge::Message rawMessage(nullptr, 0, edge::Message::metadata_t({.type = getType(), .sessionId = 0, .messageId = 0})); return rawMessage; } private: - }; // class ReplicaReady } // namespace hLLM::messages \ No newline at end of file diff --git a/include/hllm/prompt.hpp b/include/hllm/prompt.hpp index fe34cf9..6cf4b67 100644 --- a/include/hllm/prompt.hpp +++ b/include/hllm/prompt.hpp @@ -6,9 +6,9 @@ namespace hLLM { -namespace roles +namespace roles { - class RequestManager; +class RequestManager; } class Prompt @@ -17,30 +17,33 @@ class Prompt friend class roles::RequestManager; - typedef std::pair promptId_t; + typedef std::pair promptId_t; - Prompt() = delete; + Prompt() = delete; ~Prompt() = default; - Prompt(const promptId_t promptId, const std::string& prompt) : - _promptId(promptId), - _prompt(prompt) - { - } + Prompt(const promptId_t promptId, const std::string &prompt) + : _promptId(promptId), + _prompt(prompt) + {} - [[nodiscard]] __INLINE__ auto hasResponse() const { return _hasResponse; } - [[nodiscard]] __INLINE__ const std::string& getResponse() const { return _response; } - [[nodiscard]] __INLINE__ promptId_t getPromptId() const { return _promptId; } - [[nodiscard]] __INLINE__ const std::string& getPrompt() const { return _prompt; } + [[nodiscard]] __INLINE__ auto hasResponse() const { return _hasResponse; } + [[nodiscard]] __INLINE__ const std::string &getResponse() const { return _response; } + [[nodiscard]] __INLINE__ promptId_t getPromptId() const { return _promptId; } + [[nodiscard]] __INLINE__ const std::string &getPrompt() const { return _prompt; } private: - __INLINE__ void setResponse(const std::string& response) { _response = response; _hasResponse = true; } + __INLINE__ void setResponse(const std::string &response) + { + _response = response; + _hasResponse = true; + } - const promptId_t _promptId; + const promptId_t _promptId; const std::string _prompt; - std::string _response; - volatile bool _hasResponse = false; + std::string _response; + volatile bool _hasResponse = false; }; // class Prompt diff --git a/include/hllm/realTimeAnalysis.hpp b/include/hllm/realTimeAnalysis.hpp index 38441ed..32cd822 100644 --- a/include/hllm/realTimeAnalysis.hpp +++ b/include/hllm/realTimeAnalysis.hpp @@ -1,11 +1,11 @@ #pragma once -#include "../../extern/cpp-httplib/httplib.h" // Lightweight HTTP server library -#include // For atomic variables (thread-safe counters, flags) -#include // For timing utilities (steady clock, seconds, etc.) -#include // For std::mutex and std::lock_guard (thread safety) -#include // For storing the json file -#include // For debugging +#include "../../extern/cpp-httplib/httplib.h" // Lightweight HTTP server library +#include // For atomic variables (thread-safe counters, flags) +#include // For timing utilities (steady clock, seconds, etc.) +#include // For std::mutex and std::lock_guard (thread safety) +#include // For storing the json file +#include // For debugging #include #include @@ -18,23 +18,27 @@ namespace hLLM * A function for a client passing its information * (PROTOTYPE) */ -void clientPost(const size_t& InstanceId, const int& n_requests, httplib::Client cli) +void clientPost(const size_t &InstanceId, const int &n_requests, httplib::Client cli) { // Preparing the data to be passed over. It should only consist of a part of the whole json file - std::string json_data = - "{\n" - " \"Instance\": {\n" - " \"instance ID\": " + std::to_string(InstanceId) + ",\n" - " \"status\": \"active\",\n" - " \"number of requests per ms\": " + std::to_string(n_requests) + "\n" - " }\n" - "}"; + std::string json_data = "{\n" + " \"Instance\": {\n" + " \"instance ID\": " + + std::to_string(InstanceId) + + ",\n" + " \"status\": \"active\",\n" + " \"number of requests per ms\": " + + std::to_string(n_requests) + + "\n" + " }\n" + "}"; // Send Post of the new json_data auto res = cli.Post("/data", json_data, "application/json"); // Handle server response or connection failure - if (!res) { + if (!res) + { // Print an error message if connection failed std::cerr << "Failed to connect to server.\n"; } @@ -44,43 +48,42 @@ class RealTimeAnalysis { public: - RealTimeAnalysis(const std::string ip = "0.0.0.0", const size_t port = 5003) : _ip(ip), _port(port) + RealTimeAnalysis(const std::string ip = "0.0.0.0", const size_t port = 5003) + : _ip(ip), + _port(port) { _svr.Post("/data", [this](const httplib::Request &req, httplib::Response &res) { - { - // Acquire lock before modifying the shared global variable. - // The lock is automatically released when this scope ends. - std::lock_guard lock(_mtx); - _last_received_json = req.body; - } - - // (Debugging) - // Return an acknowledgment response to the client. - // This shows that the server successfully processed the POST. - // res.set_content("{\"status\":\"ok\"}", "application/json"); + { + // Acquire lock before modifying the shared global variable. + // The lock is automatically released when this scope ends. + std::lock_guard lock(_mtx); + _last_received_json = req.body; + } + + // (Debugging) + // Return an acknowledgment response to the client. + // This shows that the server successfully processed the POST. + // res.set_content("{\"status\":\"ok\"}", "application/json"); }); // Get method of posting the captured json file _svr.Get("/", [this](const httplib::Request &, httplib::Response &res) { - // Acquire the same mutex before reading shared state. - std::lock_guard lock(_mtx); + // Acquire the same mutex before reading shared state. + std::lock_guard lock(_mtx); - // Send HTML response to the browser. - res.set_content(_last_received_json, "application/json"); + // Send HTML response to the browser. + res.set_content(_last_received_json, "application/json"); }); // Start listening from a separate thread // Default: Listen on all available network interfaces (0.0.0.0) at port 5003. - _srv_thread = std::thread([this]() { - _svr.listen(_ip, _port); - }); + _srv_thread = std::thread([this]() { _svr.listen(_ip, _port); }); } ~RealTimeAnalysis() { - _svr.stop(); // tell the server to stop listening - if (_srv_thread.joinable()) - _srv_thread.join(); // clean shutdown + _svr.stop(); // tell the server to stop listening + if (_srv_thread.joinable()) _srv_thread.join(); // clean shutdown } private: @@ -116,4 +119,4 @@ class RealTimeAnalysis std::thread _srv_thread; }; -} \ No newline at end of file +} // namespace hLLM \ No newline at end of file diff --git a/include/hllm/role.hpp b/include/hllm/role.hpp index 23883fd..442d44a 100644 --- a/include/hllm/role.hpp +++ b/include/hllm/role.hpp @@ -15,27 +15,24 @@ class Role { public: - Role() = delete; + Role() = delete; ~Role() = default; - typedef std::function, const hLLM::edge::Message&)> messageHandler_t; + typedef std::function, const hLLM::edge::Message &)> messageHandler_t; struct edgeHandlerSubscription_t { hLLM::edge::Message::messageType_t type; - std::shared_ptr edge; - messageHandler_t handler; + std::shared_ptr edge; + messageHandler_t handler; }; - Role( - const configuration::Deployment deployment, - taskr::Runtime* const taskr - ) : - _deployment(deployment), - _taskr(taskr) + Role(const configuration::Deployment deployment, taskr::Runtime *const taskr) + : _deployment(deployment), + _taskr(taskr) { // Creating control edge configuration object - const auto& controlBufferConfig = _deployment.getControlBufferConst(); - _controlEdgeConfig = std::make_shared("Control Edge", controlBufferConfig.capacity, controlBufferConfig.size); + const auto &controlBufferConfig = _deployment.getControlBufferConst(); + _controlEdgeConfig = std::make_shared("Control Edge", controlBufferConfig.capacity, controlBufferConfig.size); _controlEdgeConfig->setCoordinationCommunicationManager(controlBufferConfig.communicationManager); _controlEdgeConfig->setCoordinationMemoryManager(controlBufferConfig.memoryManager); _controlEdgeConfig->setCoordinationMemorySpace(controlBufferConfig.memorySpace); @@ -66,15 +63,15 @@ class Role } /// This function completes the initialization of the edges, after the memory slot exchanges are completed - virtual void initializeEdges(const HiCR::GlobalMemorySlot::tag_t tag) = 0; - virtual void getMemorySlotsToExchange(std::vector& memorySlots) = 0; + virtual void initializeEdges(const HiCR::GlobalMemorySlot::tag_t tag) = 0; + virtual void getMemorySlotsToExchange(std::vector &memorySlots) = 0; - protected: + protected: virtual void initializeImpl() = 0; const configuration::Deployment _deployment; - taskr::Runtime* const _taskr; + taskr::Runtime *const _taskr; // Flag indicating whether the execution must keep running __volatile__ bool _continueRunning; @@ -88,59 +85,60 @@ class Role // Function to subscribe a message handler __INLINE__ void subscribeEdgeMessageHandler(const edgeHandlerSubscription_t subscription) { - _subscribedEdges.insert(subscription.edge); - _subscriptionToHandlerMap.insert( { { subscription.edge->getEdgeIndex(), subscription.type }, subscription.handler } ); + _subscribedEdges.insert(subscription.edge); + _subscriptionToHandlerMap.insert({{subscription.edge->getEdgeIndex(), subscription.type}, subscription.handler}); } - + private: ///////////// Heartbeat sending service __INLINE__ void heartbeatService() { // Checking, for all replicas' edges, whether any of them has a pending message - const auto message = messages::Heartbeat(); + const auto message = messages::Heartbeat(); const auto rawMessage = message.encode(); - for (const auto& edge : _heartbeatOutputEdges) edge->pushMessageLocking(rawMessage); + for (const auto &edge : _heartbeatOutputEdges) edge->pushMessageLocking(rawMessage); } - taskr::Service::serviceFc_t _taskrHeartbeatServiceFunction = [this](){ this->heartbeatService(); }; - taskr::Service _taskrHeartbeatService = taskr::Service(_taskrHeartbeatServiceFunction); + taskr::Service::serviceFc_t _taskrHeartbeatServiceFunction = [this]() { this->heartbeatService(); }; + taskr::Service _taskrHeartbeatService = taskr::Service(_taskrHeartbeatServiceFunction); std::vector> _heartbeatOutputEdges; /////////// Message Hanlding Service __INLINE__ void edgeSubscriptionListeningService() { - for (const auto& edge : _subscribedEdges) - { + for (const auto &edge : _subscribedEdges) + { // Locking thread from concurrent access edge->lock(); if (edge->hasMessage()) { - // Getting message from input edge - const auto edgeIdx = edge->getEdgeIndex(); - const auto message = edge->getMessage(); - const auto messageType = message.getMetadata().type; - const auto subscriptionMapKey = std::make_pair(edgeIdx, messageType); - - // Checking if a subscription has been registered for that edge - if (_subscriptionToHandlerMap.contains(subscriptionMapKey) == false) HICR_THROW_RUNTIME("Edge Idx %lu cointains message of type %lu that has no subscribed handler.\n", edgeIdx, messageType); - - // If it is registered, get handler - const auto& handler = _subscriptionToHandlerMap.at(subscriptionMapKey); - - // Running handler - handler(edge, message); - - // Immediately disposing (popping) of message out of the edge - edge->popMessage(); + // Getting message from input edge + const auto edgeIdx = edge->getEdgeIndex(); + const auto message = edge->getMessage(); + const auto messageType = message.getMetadata().type; + const auto subscriptionMapKey = std::make_pair(edgeIdx, messageType); + + // Checking if a subscription has been registered for that edge + if (_subscriptionToHandlerMap.contains(subscriptionMapKey) == false) + HICR_THROW_RUNTIME("Edge Idx %lu cointains message of type %lu that has no subscribed handler.\n", edgeIdx, messageType); + + // If it is registered, get handler + const auto &handler = _subscriptionToHandlerMap.at(subscriptionMapKey); + + // Running handler + handler(edge, message); + + // Immediately disposing (popping) of message out of the edge + edge->popMessage(); } // Unlocking edge edge->unlock(); } } - taskr::Service::serviceFc_t _taskrEdgeSubscriptionListeningServiceFunction = [this](){ this->edgeSubscriptionListeningService(); }; - taskr::Service _taskrEdgeSubscriptionListeningService = taskr::Service(_taskrEdgeSubscriptionListeningServiceFunction, 0); + taskr::Service::serviceFc_t _taskrEdgeSubscriptionListeningServiceFunction = [this]() { this->edgeSubscriptionListeningService(); }; + taskr::Service _taskrEdgeSubscriptionListeningService = taskr::Service(_taskrEdgeSubscriptionListeningServiceFunction, 0); std::set> _subscribedEdges; std::map, messageHandler_t> _subscriptionToHandlerMap; diff --git a/include/hllm/roles/partition/base.hpp b/include/hllm/roles/partition/base.hpp index 70d9d1b..d41e60a 100644 --- a/include/hllm/roles/partition/base.hpp +++ b/include/hllm/roles/partition/base.hpp @@ -17,10 +17,10 @@ namespace hLLM::roles::partition class Base : public hLLM::Role { public: - + struct edgeInfo_t { - configuration::Edge::edgeIndex_t index; + configuration::Edge::edgeIndex_t index; std::shared_ptr config; configuration::Partition::partitionIndex_t producerPartitionIndex; configuration::Partition::partitionIndex_t consumerPartitionIndex; @@ -33,19 +33,22 @@ class Base : public hLLM::Role class Edge final { - public: + public: - Edge(const edgeInfo_t edgeInfo) : _edgeInfo(edgeInfo) {} + Edge(const edgeInfo_t edgeInfo) + : _edgeInfo(edgeInfo) + {} ~Edge() = default; - [[nodiscard]] __INLINE__ const auto& getDataSlot() const { return _dataSlot; } - [[nodiscard]] __INLINE__ const auto& getEdgeInfo() const { return _edgeInfo; } - [[nodiscard]] __INLINE__ const bool isSatisfied() const { return _isSatisfied; } + [[nodiscard]] __INLINE__ const auto &getDataSlot() const { return _dataSlot; } + [[nodiscard]] __INLINE__ const auto &getEdgeInfo() const { return _edgeInfo; } + [[nodiscard]] __INLINE__ const bool isSatisfied() const { return _isSatisfied; } __INLINE__ void setSatisfied(const bool value = true) { - if (value == _isSatisfied) HICR_THROW_RUNTIME("The specified edge has already been set as '%s' for edge '%s'. This is a bug in hLLM.", value ? "True" : "False", _edgeInfo.config->getName().c_str()); - _isSatisfied = value; + if (value == _isSatisfied) + HICR_THROW_RUNTIME("The specified edge has already been set as '%s' for edge '%s'. This is a bug in hLLM.", value ? "True" : "False", _edgeInfo.config->getName().c_str()); + _isSatisfied = value; } __INLINE__ void setDataSlot(const std::shared_ptr dataSlot) { _dataSlot = dataSlot; } @@ -68,15 +71,15 @@ class Base : public hLLM::Role } // Make a copy of the data for the provided input edge - __INLINE__ void storeDataByCopy(const uint8_t* data, const size_t size) + __INLINE__ void storeDataByCopy(const uint8_t *data, const size_t size) { // Getting relevant managers - auto edgeMemoryManager = _edgeInfo.config->getPayloadMemoryManager(); - auto edgeMemorySpace = _edgeInfo.config->getPayloadMemorySpace(); + auto edgeMemoryManager = _edgeInfo.config->getPayloadMemoryManager(); + auto edgeMemorySpace = _edgeInfo.config->getPayloadMemorySpace(); auto edgeCommunicationManager = _edgeInfo.config->getPayloadCommunicationManager(); // Registering memory slot for the incoming data - const auto srcSlot = edgeMemoryManager->registerLocalMemorySlot(edgeMemorySpace, (void*)data, size); + const auto srcSlot = edgeMemoryManager->registerLocalMemorySlot(edgeMemorySpace, (void *)data, size); // Creating new buffer for the edge's data const auto dataSlot = edgeMemoryManager->allocateLocalMemorySlot(edgeMemorySpace, size); @@ -93,71 +96,66 @@ class Base : public hLLM::Role } // Store a reference/cpoint to the data for the provided input edge - __INLINE__ void storeDataByReference(const uint8_t* data, const size_t size) + __INLINE__ void storeDataByReference(const uint8_t *data, const size_t size) { // Getting relevant managers auto edgeMemoryManager = _edgeInfo.config->getPayloadMemoryManager(); - auto edgeMemorySpace = _edgeInfo.config->getPayloadMemorySpace(); + auto edgeMemorySpace = _edgeInfo.config->getPayloadMemorySpace(); // Creating new buffer for the edge's data - const auto dataSlot = edgeMemoryManager->registerLocalMemorySlot(edgeMemorySpace, (void*)data, size); + const auto dataSlot = edgeMemoryManager->registerLocalMemorySlot(edgeMemorySpace, (void *)data, size); // Setting new data slot setDataSlot(dataSlot); } - private: + private: - const edgeInfo_t _edgeInfo; - std::shared_ptr _dataSlot = nullptr; - bool _isSatisfied = false; + const edgeInfo_t _edgeInfo; + std::shared_ptr _dataSlot = nullptr; + bool _isSatisfied = false; }; - Job() = delete; + Job() = delete; ~Job() = default; - Job(const Prompt::promptId_t promptId, - const std::vector& inputEdges, - const std::vector& outputEdges) : - _promptId(promptId) + Job(const Prompt::promptId_t promptId, const std::vector &inputEdges, const std::vector &outputEdges) + : _promptId(promptId) { // Store data and allocate buffers for input edges - for (const auto& edgeInfo : inputEdges) _inputs.push_back(Job::Edge(edgeInfo)); - for (const auto& edgeInfo : outputEdges) _outputs.push_back(Job::Edge(edgeInfo)); + for (const auto &edgeInfo : inputEdges) _inputs.push_back(Job::Edge(edgeInfo)); + for (const auto &edgeInfo : outputEdges) _outputs.push_back(Job::Edge(edgeInfo)); } - + [[nodiscard]] Prompt::promptId_t getPromptId() const { return _promptId; } - [[nodiscard]] std::vector& getInputEdges() { return _inputs; } - [[nodiscard]] std::vector& getOutputEdges() { return _outputs; } + [[nodiscard]] std::vector &getInputEdges() { return _inputs; } + [[nodiscard]] std::vector &getOutputEdges() { return _outputs; } private: const Prompt::promptId_t _promptId; - std::vector _inputs; - std::vector _outputs; + std::vector _inputs; + std::vector _outputs; }; // class Job - Base() = delete; + Base() = delete; ~Base() = default; - Base( - const configuration::Deployment deployment, - const configuration::Partition::partitionIndex_t partitionIdx, - taskr::Runtime* const taskr - ) : Role(deployment, taskr), - _partitionIdx(partitionIdx) + Base(const configuration::Deployment deployment, const configuration::Partition::partitionIndex_t partitionIdx, taskr::Runtime *const taskr) + : Role(deployment, taskr), + _partitionIdx(partitionIdx) { // Get my partition configuration - const auto& partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; + const auto &partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; // Get my partition name - const auto& partitionName = partitionConfiguration->getName(); + const auto &partitionName = partitionConfiguration->getName(); // Getting list of edges in the deployment - const auto& edgeConfigs = _deployment.getEdges(); + const auto &edgeConfigs = _deployment.getEdges(); // Iterating through edges by their index and creating them - size_t inputEdgeVectorPosition = 0; + size_t inputEdgeVectorPosition = 0; size_t outputEdgeVectorPosition = 0; for (configuration::Edge::edgeIndex_t edgeIdx = 0; edgeIdx < edgeConfigs.size(); edgeIdx++) { @@ -165,72 +163,74 @@ class Base : public hLLM::Role const auto edgeConfig = edgeConfigs[edgeIdx]; // Getting edge name - const auto& edgeName = edgeConfig->getName(); + const auto &edgeName = edgeConfig->getName(); //printf("Edge: '%s' - Producer: %s, Consumer: %s\n", edgeConfig->getName().c_str(), edgeConfig->getProducer().c_str(), edgeConfig->getConsumer().c_str()); // If I am a consumer in this edge and it is not a request manager output if (edgeConfig->getConsumer() == partitionName && edgeConfig->isResultEdge() == false) { // Looking for the index of the producer of the input - const auto& producerPartitionName = edgeConfig->getProducer(); - configuration::Partition::partitionIndex_t producerPartitionIdx = 0; - bool producerFound = false; - for (const auto& partition : _deployment.getPartitions()) + const auto &producerPartitionName = edgeConfig->getProducer(); + configuration::Partition::partitionIndex_t producerPartitionIdx = 0; + bool producerFound = false; + for (const auto &partition : _deployment.getPartitions()) { if (partition->getName() == producerPartitionName) { producerFound = true; break; - } + } producerPartitionIdx++; } // Sanity check - if (producerFound == false) HICR_THROW_RUNTIME("Could not find index of producer '%s' for edge '%s'. This is a bug in hLLM.", producerPartitionName.c_str(), edgeConfig->getName().c_str()); + if (producerFound == false) + HICR_THROW_RUNTIME("Could not find index of producer '%s' for edge '%s'. This is a bug in hLLM.", producerPartitionName.c_str(), edgeConfig->getName().c_str()); // Adding new entry - _inputEdges.push_back( edgeInfo_t { .index = edgeIdx, .config = edgeConfig, .producerPartitionIndex = producerPartitionIdx, .consumerPartitionIndex = _partitionIdx }); + _inputEdges.push_back(edgeInfo_t{.index = edgeIdx, .config = edgeConfig, .producerPartitionIndex = producerPartitionIdx, .consumerPartitionIndex = _partitionIdx}); // Adding map entry to link the edge index to its position _edgeIndexToVectorPositionMap[edgeIdx] = inputEdgeVectorPosition; // Increasing position inputEdgeVectorPosition++; - } + } // If I am a producer in this edge - if (edgeConfig->getProducer() == partitionName && edgeConfig->isPromptEdge() == false) + if (edgeConfig->getProducer() == partitionName && edgeConfig->isPromptEdge() == false) { // Looking for the index of the consumer of the input - const auto& consumerPartitionName = edgeConfig->getConsumer(); - configuration::Partition::partitionIndex_t consumerPartitionIdx = 0; - bool consumerFound = false; - for (const auto& partition : _deployment.getPartitions()) + const auto &consumerPartitionName = edgeConfig->getConsumer(); + configuration::Partition::partitionIndex_t consumerPartitionIdx = 0; + bool consumerFound = false; + for (const auto &partition : _deployment.getPartitions()) { if (partition->getName() == consumerPartitionName) { consumerFound = true; break; - } + } consumerPartitionIdx++; } // Sanity check - if (consumerFound == false) HICR_THROW_RUNTIME("Could not find index of consumer '%s' for edge '%s'. This is a bug in hLLM.", consumerPartitionName.c_str(), edgeConfig->getName().c_str()); + if (consumerFound == false) + HICR_THROW_RUNTIME("Could not find index of consumer '%s' for edge '%s'. This is a bug in hLLM.", consumerPartitionName.c_str(), edgeConfig->getName().c_str()); // Adding new entry - _outputEdges.push_back( edgeInfo_t { .index = edgeIdx, .config = edgeConfig, .producerPartitionIndex = _partitionIdx, .consumerPartitionIndex = consumerPartitionIdx }); + _outputEdges.push_back(edgeInfo_t{.index = edgeIdx, .config = edgeConfig, .producerPartitionIndex = _partitionIdx, .consumerPartitionIndex = consumerPartitionIdx}); // Adding map entry to link the edge index to its position _edgeIndexToVectorPositionMap[edgeIdx] = outputEdgeVectorPosition; // Increasing position outputEdgeVectorPosition++; - } + } } } - protected: + protected: const configuration::Partition::partitionIndex_t _partitionIdx; @@ -244,4 +244,4 @@ class Base : public hLLM::Role std::map _edgeIndexToVectorPositionMap; }; // class Base -} // namespace hLLM \ No newline at end of file +} // namespace hLLM::roles::partition \ No newline at end of file diff --git a/include/hllm/roles/partition/coordinator.hpp b/include/hllm/roles/partition/coordinator.hpp index 36c528c..5f4ad78 100644 --- a/include/hllm/roles/partition/coordinator.hpp +++ b/include/hllm/roles/partition/coordinator.hpp @@ -33,63 +33,64 @@ class Coordinator final : public Base public: Replica() = delete; - Replica( - const configuration::Partition::partitionIndex_t partitionIndex, - const configuration::Replica::replicaIndex_t replicaIndex, - const std::vector& coordinatorInputs, - const std::vector& coordinatorOutputs, - const std::shared_ptr controlEdgeConfig) : - _partitionIndex(partitionIndex), - _replicaIndex(replicaIndex) + Replica(const configuration::Partition::partitionIndex_t partitionIndex, + const configuration::Replica::replicaIndex_t replicaIndex, + const std::vector &coordinatorInputs, + const std::vector &coordinatorOutputs, + const std::shared_ptr controlEdgeConfig) + : _partitionIndex(partitionIndex), + _replicaIndex(replicaIndex) + { + // For every one of the coordinator inputs, we create an output edge that allows us to redirect such inputs to the replica + for (const auto &edge : coordinatorInputs) + { + const auto edgeIdx = edge.index; + const auto &edgeConfig = edge.config; + + // Create the output edges to pass this distribute the input to any of the replicas + auto newOutput = std::make_shared(*edgeConfig, edge::edgeType_t::coordinatorToReplica, edgeIdx, _partitionIndex, _partitionIndex, _replicaIndex); + _dataOutputs.push_back(newOutput); + } + + // For everyone of the coordinator outputs, we create an input edge that receives such data from the replica to be redirected to a peer coordinator + for (const auto &edge : coordinatorOutputs) { - // For every one of the coordinator inputs, we create an output edge that allows us to redirect such inputs to the replica - for (const auto& edge : coordinatorInputs) - { - const auto edgeIdx = edge.index; - const auto& edgeConfig = edge.config; - - // Create the output edges to pass this distribute the input to any of the replicas - auto newOutput = std::make_shared(*edgeConfig, edge::edgeType_t::coordinatorToReplica, edgeIdx, _partitionIndex, _partitionIndex, _replicaIndex); - _dataOutputs.push_back(newOutput); - } - - // For everyone of the coordinator outputs, we create an input edge that receives such data from the replica to be redirected to a peer coordinator - for (const auto& edge : coordinatorOutputs) - { - const auto edgeIdx = edge.index; - const auto& edgeConfig = edge.config; - - // Create the input edges to receive the replica - auto newInput = std::make_shared(*edgeConfig, edge::edgeType_t::replicaToCoordinator, edgeIdx, _partitionIndex, _partitionIndex, _replicaIndex); - _dataInputs.push_back(newInput); - } - - // Creating control edges for exchanging control operations (e.g., heartbeat, etc) - _controlInput = std::make_shared(*controlEdgeConfig, edge::edgeType_t::replicaToCoordinator, edge::Base::controlEdgeIndex, _partitionIndex, _partitionIndex, _replicaIndex); - _controlOutput = std::make_shared(*controlEdgeConfig, edge::edgeType_t::coordinatorToReplica, edge::Base::controlEdgeIndex, _partitionIndex, _partitionIndex, _replicaIndex); + const auto edgeIdx = edge.index; + const auto &edgeConfig = edge.config; + + // Create the input edges to receive the replica + auto newInput = std::make_shared(*edgeConfig, edge::edgeType_t::replicaToCoordinator, edgeIdx, _partitionIndex, _partitionIndex, _replicaIndex); + _dataInputs.push_back(newInput); } + // Creating control edges for exchanging control operations (e.g., heartbeat, etc) + _controlInput = + std::make_shared(*controlEdgeConfig, edge::edgeType_t::replicaToCoordinator, edge::Base::controlEdgeIndex, _partitionIndex, _partitionIndex, _replicaIndex); + _controlOutput = + std::make_shared(*controlEdgeConfig, edge::edgeType_t::coordinatorToReplica, edge::Base::controlEdgeIndex, _partitionIndex, _partitionIndex, _replicaIndex); + } + ~Replica() = default; - __INLINE__ void addDataInputEdge(std::shared_ptr edge) { _dataInputs.push_back(edge); } - __INLINE__ void addDataOutputEdge(std::shared_ptr edge) { _dataOutputs.push_back(edge); } - __INLINE__ const auto& getDataInputs() const { return _dataInputs; } - __INLINE__ const auto& getDataOutputs() const { return _dataOutputs; } - __INLINE__ const auto& getControlInput() const { return _controlInput; } - __INLINE__ const auto& getControlOutput() const { return _controlOutput; } + __INLINE__ void addDataInputEdge(std::shared_ptr edge) { _dataInputs.push_back(edge); } + __INLINE__ void addDataOutputEdge(std::shared_ptr edge) { _dataOutputs.push_back(edge); } + __INLINE__ const auto &getDataInputs() const { return _dataInputs; } + __INLINE__ const auto &getDataOutputs() const { return _dataOutputs; } + __INLINE__ const auto &getControlInput() const { return _controlInput; } + __INLINE__ const auto &getControlOutput() const { return _controlOutput; } - __INLINE__ void getMemorySlotsToExchange(std::vector& memorySlots) + __INLINE__ void getMemorySlotsToExchange(std::vector &memorySlots) { - for (const auto& edge : _dataInputs) edge->getMemorySlotsToExchange(memorySlots); - for (const auto& edge : _dataOutputs) edge->getMemorySlotsToExchange(memorySlots); + for (const auto &edge : _dataInputs) edge->getMemorySlotsToExchange(memorySlots); + for (const auto &edge : _dataOutputs) edge->getMemorySlotsToExchange(memorySlots); _controlInput->getMemorySlotsToExchange(memorySlots); _controlOutput->getMemorySlotsToExchange(memorySlots); } __INLINE__ void initializeEdges(const HiCR::GlobalMemorySlot::tag_t tag) { - for (const auto& edge : _dataInputs) edge->initialize(tag); - for (const auto& edge : _dataOutputs) edge->initialize(tag); + for (const auto &edge : _dataInputs) edge->initialize(tag); + for (const auto &edge : _dataOutputs) edge->initialize(tag); _controlInput->initialize(tag); _controlOutput->initialize(tag); } @@ -98,39 +99,35 @@ class Coordinator final : public Base private: - const configuration::Partition::partitionIndex_t _partitionIndex; - const configuration::Replica::replicaIndex_t _replicaIndex; + const configuration::Partition::partitionIndex_t _partitionIndex; + const configuration::Replica::replicaIndex_t _replicaIndex; // Data Input / Output edges to/from this coordinator<->replica - std::vector> _dataInputs; + std::vector> _dataInputs; std::vector> _dataOutputs; // Control Input / Output edges to/from this coordinator<->replica - std::shared_ptr _controlInput; + std::shared_ptr _controlInput; std::shared_ptr _controlOutput; }; // class Replica public: - Coordinator() = delete; + Coordinator() = delete; ~Coordinator() = default; - Coordinator( - const configuration::Deployment deployment, - const configuration::Partition::partitionIndex_t partitionIdx, - taskr::Runtime* const taskr - ) : - Base(deployment, partitionIdx, taskr) + Coordinator(const configuration::Deployment deployment, const configuration::Partition::partitionIndex_t partitionIdx, taskr::Runtime *const taskr) + : Base(deployment, partitionIdx, taskr) { // Get my partition configuration - const auto& partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; + const auto &partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; // Get my partition name - const auto& partitionName = partitionConfiguration->getName(); + const auto &partitionName = partitionConfiguration->getName(); // Getting list of replicas in the partition - const auto& replicas = partitionConfiguration->getReplicas(); + const auto &replicas = partitionConfiguration->getReplicas(); // Filling replica set for (configuration::Replica::replicaIndex_t replicaIndex = 0; replicaIndex < replicas.size(); replicaIndex++) @@ -146,14 +143,14 @@ class Coordinator final : public Base } // Iterating through input edges to create a connection with replicas and peer coordinators on that input - for (const auto& edge : _inputEdges) + for (const auto &edge : _inputEdges) { - const auto edgeIdx = edge.index; - const auto& edgeConfig = edge.config; - const auto& edgeName = edgeConfig->getName(); - const auto producerPartitionIdx = edge.producerPartitionIndex; - const auto consumerPartitionIdx = edge.consumerPartitionIndex; - + const auto edgeIdx = edge.index; + const auto &edgeConfig = edge.config; + const auto &edgeName = edgeConfig->getName(); + const auto producerPartitionIdx = edge.producerPartitionIndex; + const auto consumerPartitionIdx = edge.consumerPartitionIndex; + // Defining edge type, based on whether we expect this data from another coordinator or from the request manager auto edgeType = edgeConfig->isPromptEdge() ? edge::edgeType_t::requestManagerToCoordinator : edge::edgeType_t::coordinatorToCoordinator; @@ -161,41 +158,42 @@ class Coordinator final : public Base _partitionDataInputs.push_back(std::make_shared(*edgeConfig, edgeType, edgeIdx, producerPartitionIdx, consumerPartitionIdx, edge::Base::coordinatorReplicaIndex)); // if (edgeConfig->isPromptEdge()) printf("[Coordinator] Prompt Input Edge: Type: %u, EdgeIdx: %lu, CP: %lu, PP: %lu, RI: %lu\n", edgeType, edgeIdx, producerPartitionIdx, consumerPartitionIdx, edge::Base::coordinatorReplicaIndex); - } + } // Iterating through output edges to create a connection with replicas and peer coordinators on that output - for (const auto& edge : _outputEdges) + for (const auto &edge : _outputEdges) { - const auto edgeIdx = edge.index; - const auto& edgeConfig = edge.config; - const auto& edgeName = edgeConfig->getName(); - const auto producerPartitionIdx = edge.producerPartitionIndex; - const auto consumerPartitionIdx = edge.consumerPartitionIndex; + const auto edgeIdx = edge.index; + const auto &edgeConfig = edge.config; + const auto &edgeName = edgeConfig->getName(); + const auto producerPartitionIdx = edge.producerPartitionIndex; + const auto consumerPartitionIdx = edge.consumerPartitionIndex; - // Defining edge type, based on whether we expect to push this data to another coordinator or to the request manager - auto edgeType = edgeConfig->isResultEdge() ? edge::edgeType_t::coordinatorToRequestManager : edge::edgeType_t::coordinatorToCoordinator; + // Defining edge type, based on whether we expect to push this data to another coordinator or to the request manager + auto edgeType = edgeConfig->isResultEdge() ? edge::edgeType_t::coordinatorToRequestManager : edge::edgeType_t::coordinatorToCoordinator; // Create the output edge to pass this information to the receiving partition - _partitionDataOutputs.push_back(std::make_shared(*edgeConfig, edgeType, edgeIdx, producerPartitionIdx, consumerPartitionIdx, edge::Base::coordinatorReplicaIndex)); + _partitionDataOutputs.push_back( + std::make_shared(*edgeConfig, edgeType, edgeIdx, producerPartitionIdx, consumerPartitionIdx, edge::Base::coordinatorReplicaIndex)); // if (edgeConfig->isResultEdge()) printf("[Coordinator] Result Output Edge: Type: %u, EdgeIdx: %lu, CP: %lu, PP: %lu, RI: %lu\n", edgeType, edgeIdx, producerPartitionIdx, consumerPartitionIdx, edge::Base::coordinatorReplicaIndex); - } + } } // Gets the memory slots required by the edges - __INLINE__ void getMemorySlotsToExchange(std::vector& memorySlots) + __INLINE__ void getMemorySlotsToExchange(std::vector &memorySlots) { - for (const auto& edge : _partitionDataInputs) edge->getMemorySlotsToExchange(memorySlots); - for (const auto& edge : _partitionDataOutputs) edge->getMemorySlotsToExchange(memorySlots); - for (const auto& replica : _replicaMap) replica.second->getMemorySlotsToExchange(memorySlots); + for (const auto &edge : _partitionDataInputs) edge->getMemorySlotsToExchange(memorySlots); + for (const auto &edge : _partitionDataOutputs) edge->getMemorySlotsToExchange(memorySlots); + for (const auto &replica : _replicaMap) replica.second->getMemorySlotsToExchange(memorySlots); } /// This function completes the initialization of the edges, after the memory slot exchanges are completed __INLINE__ void initializeEdges(const HiCR::GlobalMemorySlot::tag_t tag) { - for (const auto& edge : _partitionDataInputs) edge->initialize(tag); - for (const auto& edge : _partitionDataOutputs) edge->initialize(tag); - for (const auto& replica : _replicaMap) replica.second->initializeEdges(tag); + for (const auto &edge : _partitionDataInputs) edge->initialize(tag); + for (const auto &edge : _partitionDataOutputs) edge->initialize(tag); + for (const auto &replica : _replicaMap) replica.second->initializeEdges(tag); } private: @@ -204,40 +202,45 @@ class Coordinator final : public Base __INLINE__ void initializeImpl() override { // Get my partition configuration - const auto& partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; + const auto &partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; // Get my partition name - const auto& partitionName = partitionConfiguration->getName(); + const auto &partitionName = partitionConfiguration->getName(); // Welcome message // printf("Initializing Partition Coordinator Index %lu - Name: %s - %lu Consumer / %lu Producer edges...\n", _partitionIdx, partitionName.c_str(), _partitionDataInputs.size(), _partitionDataOutputs.size()); // Subscribing to the heartbeat sending service for my replicas - for (const auto& replica : _replicaMap) subscribeHeartbeatEdge(replica.second->getControlOutput()); + for (const auto &replica : _replicaMap) subscribeHeartbeatEdge(replica.second->getControlOutput()); // Subscribing control edges to the message service for my replicas's heartbeat messages - for (const auto& replica : _replicaMap) - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::heartbeat, - replica.second->getControlInput(), - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ heartbeatMessageHandler(edge, std::make_shared(message)); } }); + for (const auto &replica : _replicaMap) + subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t{ + hLLM::messages::messageTypes::heartbeat, replica.second->getControlInput(), [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + heartbeatMessageHandler(edge, std::make_shared(message)); + }}); // Subscribing control edges to the message service for my replicas's ready message - for (const auto& replica : _replicaMap) - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::replicaReady, - replica.second->getControlInput(), - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ replicaReadyMessageHandler(edge, std::make_shared(message)); } }); + for (const auto &replica : _replicaMap) + subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t{ + hLLM::messages::messageTypes::replicaReady, replica.second->getControlInput(), [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + replicaReadyMessageHandler(edge, std::make_shared(message)); + }}); // Subscribing data input edges to the message service for my replicas results (outputs) - for (const auto& replica : _replicaMap) for (const auto& dataEdge : replica.second->getDataInputs()) - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::data, - dataEdge, - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ outputDataMessageHandler(edge, std::make_shared(message)); } }); + for (const auto &replica : _replicaMap) + for (const auto &dataEdge : replica.second->getDataInputs()) + subscribeEdgeMessageHandler( + hLLM::Role::edgeHandlerSubscription_t{hLLM::messages::messageTypes::data, dataEdge, [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + outputDataMessageHandler(edge, std::make_shared(message)); + }}); // Subscribing data input edges to the message service for my peers - for (const auto& dataEdge : _partitionDataInputs) - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::data, - dataEdge, - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ inputDataMessageHandler(edge, std::make_shared(message)); } }); + for (const auto &dataEdge : _partitionDataInputs) + subscribeEdgeMessageHandler( + hLLM::Role::edgeHandlerSubscription_t{hLLM::messages::messageTypes::data, dataEdge, [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + inputDataMessageHandler(edge, std::make_shared(message)); + }}); // Registering service for job management _taskr->addService(&_taskrJobManagementService); @@ -246,7 +249,7 @@ class Coordinator final : public Base __INLINE__ void heartbeatMessageHandler(const std::shared_ptr edge, const std::shared_ptr message) { const auto replicaIdx = edge->getReplicaIndex(); - if(_deployment.getHeartbeat().visible == true) printf("[Coordinator %lu] Received heartbeat from replica %lu.\n", _partitionIdx, replicaIdx); + if (_deployment.getHeartbeat().visible == true) printf("[Coordinator %lu] Received heartbeat from replica %lu.\n", _partitionIdx, replicaIdx); } __INLINE__ void replicaReadyMessageHandler(const std::shared_ptr edge, const std::shared_ptr message) @@ -260,16 +263,15 @@ class Coordinator final : public Base _replicaQueueMutex.unlock(); } - __INLINE__ void inputDataMessageHandler(const std::shared_ptr edge, const std::shared_ptr message) { // Getting prompt id from data const auto promptId = message->getPromptId(); - const auto data = message->getData(); - const auto size = message->getSize(); - const auto edgeIdx = edge->getEdgeIndex(); - const auto edgePos = _edgeIndexToVectorPositionMap[edgeIdx]; - + const auto data = message->getData(); + const auto size = message->getSize(); + const auto edgeIdx = edge->getEdgeIndex(); + const auto edgePos = _edgeIndexToVectorPositionMap[edgeIdx]; + // printf("[Coordinator %lu] Received data for prompt %lu/%lu, edge '%s'.\n", _partitionIdx, promptId.first, promptId.second, edge->getEdgeConfig().getName().c_str()); // Pointer to the job object @@ -278,7 +280,7 @@ class Coordinator final : public Base // Check if there exists already a job for this incoming data // If it does not exist, create a new one. Otherwise, take it from the map _jobMapMutex.lock(); - if(_jobMap.contains(promptId) == false) + if (_jobMap.contains(promptId) == false) { // Creating new job entry job = std::make_shared(promptId, _inputEdges, _outputEdges); @@ -290,14 +292,15 @@ class Coordinator final : public Base _pendingJobQueueMutex.lock(); _pendingJobQueue.push(job); _pendingJobQueueMutex.unlock(); - } + } // Otherwise, it exists so grab it from the job map - else job = _jobMap.at(promptId); + else + job = _jobMap.at(promptId); _jobMapMutex.unlock(); - + // Getting the input that is satisfied by this message - auto& input = job->getInputEdges()[edgePos]; + auto &input = job->getInputEdges()[edgePos]; // Making a copy of the data into the edge buffer -- we don't do it by referece because we want to free up the input channels immediately to avoid deadlocks input.storeDataByCopy(data, size); @@ -310,41 +313,46 @@ class Coordinator final : public Base { // Getting prompt id from data const auto promptId = message->getPromptId(); - const auto data = message->getData(); - const auto size = message->getSize(); - const auto edgeIdx = edge->getEdgeIndex(); - const auto edgePos = _edgeIndexToVectorPositionMap[edgeIdx]; + const auto data = message->getData(); + const auto size = message->getSize(); + const auto edgeIdx = edge->getEdgeIndex(); + const auto edgePos = _edgeIndexToVectorPositionMap[edgeIdx]; // printf("[Coordinator %lu] Received output data from replica %lu for prompt %lu/%lu, edge '%s'.\n", _partitionIdx, replicaIdx, promptId.first, promptId.second, edge->getEdgeConfig().getName().c_str()); // Check if there exists already a job for this incoming output data _jobMapMutex.lock(); - if(_jobMap.contains(promptId) == false) HICR_THROW_RUNTIME("The prompt id corresponding to the output data does not exist. This must be a bug in hLLM\n"); - + if (_jobMap.contains(promptId) == false) HICR_THROW_RUNTIME("The prompt id corresponding to the output data does not exist. This must be a bug in hLLM\n"); + // Otherwise, it exists so grab it from the job map - auto& job = _jobMap.at(promptId); + auto &job = _jobMap.at(promptId); _jobMapMutex.unlock(); // Forward messages directly to the next peer - const auto forwardMessage = messages::Data(data, size, promptId); - const auto& peerOutput = _partitionDataOutputs[edgePos]; + const auto forwardMessage = messages::Data(data, size, promptId); + const auto &peerOutput = _partitionDataOutputs[edgePos]; // Encoding message const auto rawMessage = forwardMessage.encode(); - + // printf("[Coordinator %lu] Pushing output data for prompt %lu/%lu, edge '%s' (index: %lu, pos: %lu) to Coordinator %lu.\n", _partitionIdx, promptId.first, promptId.second, peerOutput->getEdgeConfig().getName().c_str(), edgeIdx, edgePos, peerOutput->getConsumerPartitionIndex()); // Send message only when the peer is ready peerOutput->pushMessageLocking(rawMessage); // Getting the input that is satisfied by this message - auto& output = job->getOutputEdges()[edgePos]; + auto &output = job->getOutputEdges()[edgePos]; // Setting edge as satisfied output.setSatisfied(); - // Checking if the job is ready to be removed - bool isJobFinished = true; - for (const auto& output : job->getOutputEdges()) if (output.isSatisfied() == false) { isJobFinished = false; break; }; + // Checking if the job is ready to be removed + bool isJobFinished = true; + for (const auto &output : job->getOutputEdges()) + if (output.isSatisfied() == false) + { + isJobFinished = false; + break; + }; // If it's finished, do the following if (isJobFinished) @@ -371,7 +379,7 @@ class Coordinator final : public Base { job = _pendingJobQueue.front(); _pendingJobQueue.pop(); - } + } _pendingJobQueueMutex.unlock(); // If no jobs were in the queue, simply return @@ -382,7 +390,12 @@ class Coordinator final : public Base // Checking if the job is ready to be sent to a replica bool isJobReady = true; - for (const auto& input : job->getInputEdges()) if (input.isSatisfied() == false) { isJobReady = false; break; }; + for (const auto &input : job->getInputEdges()) + if (input.isSatisfied() == false) + { + isJobReady = false; + break; + }; // If the job is ready to go, check if there is a ready replica to take on this job std::shared_ptr replica = nullptr; @@ -399,21 +412,21 @@ class Coordinator final : public Base } // If the job is ready to go, try to send it to one of the replicas - if (replica != nullptr) + if (replica != nullptr) { // Now we have a ready job and a ready replica, sending the job to the replica // printf("Sending job for prompt %lu/%lu to replica %lu\n", promptId.first, promptId.second, replica->getReplicaIdx()); - + // For each of the edges, push the data through the replica's channels for (size_t edgePos = 0; edgePos < _partitionDataInputs.size(); edgePos++) { // Getting corresponding edges - auto& inputEdge = job->getInputEdges()[edgePos]; - const auto& outputEdge = replica->getDataOutputs()[edgePos]; + auto &inputEdge = job->getInputEdges()[edgePos]; + const auto &outputEdge = replica->getDataOutputs()[edgePos]; // Creating message - const auto& dataSlot = inputEdge.getDataSlot(); - const auto message = messages::Data((const uint8_t*)dataSlot->getPointer(), dataSlot->getSize(), promptId); + const auto &dataSlot = inputEdge.getDataSlot(); + const auto message = messages::Data((const uint8_t *)dataSlot->getPointer(), dataSlot->getSize(), promptId); // Encoding message const auto rawMessage = message.encode(); @@ -434,28 +447,28 @@ class Coordinator final : public Base _pendingJobQueue.push(job); _pendingJobQueueMutex.unlock(); } - taskr::Service::serviceFc_t _jobManagementServiceFunction = [this](){ this->jobManagementService(); }; - taskr::Service _taskrJobManagementService = taskr::Service(_jobManagementServiceFunction, 0); + taskr::Service::serviceFc_t _jobManagementServiceFunction = [this]() { this->jobManagementService(); }; + taskr::Service _taskrJobManagementService = taskr::Service(_jobManagementServiceFunction, 0); // Container for partition replica objects - std::map> _replicaMap; + std::map> _replicaMap; // Mutual exclusion mechanism to access the ready replica queue - std::mutex _replicaQueueMutex; + std::mutex _replicaQueueMutex; std::queue> _replicaQueue; // Data Input / Output edges from other partition coordinators - std::vector> _partitionDataInputs; + std::vector> _partitionDataInputs; std::vector> _partitionDataOutputs; // Mutual exclusion mechanism to access the pending job queue - std::mutex _pendingJobQueueMutex; + std::mutex _pendingJobQueueMutex; std::queue> _pendingJobQueue; // Map of jobs, indexed by prompt id - std::mutex _jobMapMutex; + std::mutex _jobMapMutex; std::map> _jobMap; }; // class Coordinator -} // namespace hLLM::coordinator \ No newline at end of file +} // namespace hLLM::roles::partition \ No newline at end of file diff --git a/include/hllm/roles/partition/replica.hpp b/include/hllm/roles/partition/replica.hpp index e7e4ff8..2dce52a 100644 --- a/include/hllm/roles/partition/replica.hpp +++ b/include/hllm/roles/partition/replica.hpp @@ -24,41 +24,42 @@ class Replica final : public Base Replica() = delete; - Replica( - const configuration::Deployment deployment, - const configuration::Partition::partitionIndex_t partitionIdx, - const configuration::Replica::replicaIndex_t replicaIdx, - taskr::Runtime* const taskr, - const std::map& registeredFunctions - ) : Base(deployment, partitionIdx, taskr), - _replicaIdx(replicaIdx), - _registeredFunctions(registeredFunctions) + Replica(const configuration::Deployment deployment, + const configuration::Partition::partitionIndex_t partitionIdx, + const configuration::Replica::replicaIndex_t replicaIdx, + taskr::Runtime *const taskr, + const std::map ®isteredFunctions) + : Base(deployment, partitionIdx, taskr), + _replicaIdx(replicaIdx), + _registeredFunctions(registeredFunctions) { // Get my partition configuration - const auto& partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; + const auto &partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; // Get my partition name - const auto& partitionName = partitionConfiguration->getName(); + const auto &partitionName = partitionConfiguration->getName(); // Iterating through input edges to create a connection with the coordinator on that edge - for (const auto& edge : _inputEdges) + for (const auto &edge : _inputEdges) { - const auto edgeIdx = edge.index; - const auto& edgeConfig = edge.config; + const auto edgeIdx = edge.index; + const auto &edgeConfig = edge.config; _coordinatorDataInputs.push_back(std::make_shared(*edgeConfig, edge::edgeType_t::coordinatorToReplica, edgeIdx, _partitionIdx, _partitionIdx, _replicaIdx)); } - + // Iterating through output edges to create a connection with the coordinator on that edge - for (const auto& edge : _outputEdges) + for (const auto &edge : _outputEdges) { - const auto edgeIdx = edge.index; - const auto& edgeConfig = edge.config; + const auto edgeIdx = edge.index; + const auto &edgeConfig = edge.config; _coordinatorDataOutputs.push_back(std::make_shared(*edgeConfig, edge::edgeType_t::replicaToCoordinator, edgeIdx, _partitionIdx, _partitionIdx, _replicaIdx)); } // Create Control edges with my partition coordinator - _coordinatorControlInput = std::make_shared(*_controlEdgeConfig, edge::edgeType_t::coordinatorToReplica, edge::Base::controlEdgeIndex, _partitionIdx, _partitionIdx, _replicaIdx); - _coordinatorControlOutput = std::make_shared(*_controlEdgeConfig, edge::edgeType_t::replicaToCoordinator, edge::Base::controlEdgeIndex, _partitionIdx, _partitionIdx, _replicaIdx); + _coordinatorControlInput = + std::make_shared(*_controlEdgeConfig, edge::edgeType_t::coordinatorToReplica, edge::Base::controlEdgeIndex, _partitionIdx, _partitionIdx, _replicaIdx); + _coordinatorControlOutput = + std::make_shared(*_controlEdgeConfig, edge::edgeType_t::replicaToCoordinator, edge::Base::controlEdgeIndex, _partitionIdx, _partitionIdx, _replicaIdx); // Creating general TaskR function for all execution graph tasks _taskrFunction = std::make_unique([this](taskr::Task *task) { runTaskRFunction(task); }); @@ -70,25 +71,31 @@ class Replica final : public Base const auto &tasks = partitionConfiguration->getTasks(); // Calculating, for each of this partition's tasks, what are the edge indexes that correspond to their inputs - for (const auto& task : tasks) - for (const auto& taskInput : task->getInputs()) + for (const auto &task : tasks) + for (const auto &taskInput : task->getInputs()) for (size_t edgePos = 0; edgePos < _inputEdges.size(); edgePos++) if (taskInput == _inputEdges[edgePos].config->getName()) - { _taskInputEdgePositions[task->getFunctionName()].push_back(edgePos); break; } + { + _taskInputEdgePositions[task->getFunctionName()].push_back(edgePos); + break; + } // Similarly, for each edge, store which tasks need to be notified of their arrival - for (const auto& input : _inputEdges) - for (const auto& task : tasks) - for (const auto& taskInput : task->getInputs()) - { - const auto& inputName = input.config->getName(); - if (taskInput == inputName) - { _inputEdgeTaskDependencies[inputName].push_back(task->getFunctionName()); break; } - } + for (const auto &input : _inputEdges) + for (const auto &task : tasks) + for (const auto &taskInput : task->getInputs()) + { + const auto &inputName = input.config->getName(); + if (taskInput == inputName) + { + _inputEdgeTaskDependencies[inputName].push_back(task->getFunctionName()); + break; + } + } // Calculating, for each of this partition's tasks, what are the edge indexes that correspond to their outputs - for (const auto& task : tasks) - for (const auto& taskOutput : task->getOutputs()) + for (const auto &task : tasks) + for (const auto &taskOutput : task->getOutputs()) { bool foundEdge = false; printf("Looking for task output: %s\n", taskOutput.c_str()); @@ -96,14 +103,15 @@ class Replica final : public Base // Finding the edge corresponding to this task output for (size_t edgePos = 0; edgePos < _outputEdges.size(); edgePos++) if (taskOutput == _outputEdges[edgePos].config->getName()) - { - _taskOutputEdgePositions[taskOutput] = edgePos; - foundEdge = true; - break; - } - - // Sanity check - if (foundEdge == false) HICR_THROW_RUNTIME("[Replica %lu / %lu] Could not find the edge for output: %s. This must be a bug in hLLM", _partitionIdx, _replicaIdx, taskOutput.c_str()); + { + _taskOutputEdgePositions[taskOutput] = edgePos; + foundEdge = true; + break; + } + + // Sanity check + if (foundEdge == false) + HICR_THROW_RUNTIME("[Replica %lu / %lu] Could not find the edge for output: %s. This must be a bug in hLLM", _partitionIdx, _replicaIdx, taskOutput.c_str()); } } @@ -112,29 +120,29 @@ class Replica final : public Base /// This function completes the initialization of the edges, after the memory slot exchanges are completed __INLINE__ void initializeEdges(const HiCR::GlobalMemorySlot::tag_t tag) { - for (const auto& edge : _coordinatorDataInputs) edge->initialize(tag); - for (const auto& edge : _coordinatorDataOutputs) edge->initialize(tag); + for (const auto &edge : _coordinatorDataInputs) edge->initialize(tag); + for (const auto &edge : _coordinatorDataOutputs) edge->initialize(tag); _coordinatorControlInput->initialize(tag); _coordinatorControlOutput->initialize(tag); } - __INLINE__ void getMemorySlotsToExchange(std::vector& memorySlots) + __INLINE__ void getMemorySlotsToExchange(std::vector &memorySlots) { - for (const auto& edge : _coordinatorDataInputs) edge->getMemorySlotsToExchange(memorySlots); - for (const auto& edge : _coordinatorDataOutputs) edge->getMemorySlotsToExchange(memorySlots); + for (const auto &edge : _coordinatorDataInputs) edge->getMemorySlotsToExchange(memorySlots); + for (const auto &edge : _coordinatorDataOutputs) edge->getMemorySlotsToExchange(memorySlots); _coordinatorControlInput->getMemorySlotsToExchange(memorySlots); _coordinatorControlOutput->getMemorySlotsToExchange(memorySlots); } - private: - + private: + __INLINE__ void runTaskRFunction(taskr::Task *taskrTask) { const auto &taskId = taskrTask->getTaskId(); const auto &task = _taskLabelMap.at(taskId); const auto &taskConfig = task->getConfig(); const auto &function = task->getFunction(); - const auto promptId = _activeJob->getPromptId(); + const auto promptId = _activeJob->getPromptId(); // Setting tasks's partition and replica idxs task->setPartitionIdx(_partitionIdx); @@ -142,15 +150,16 @@ class Replica final : public Base task->setPromptId(promptId); // Setting the edges that the task needs to send outputs through - for (const auto& output : taskConfig.getOutputs()) + for (const auto &output : taskConfig.getOutputs()) { // Getting edge position corresponding to the task output - if (_taskOutputEdgePositions.contains(output) == false) HICR_THROW_RUNTIME("[Replica %lu / %lu] Could not find output: %s's position...", _partitionIdx, _replicaIdx, output.c_str()); + if (_taskOutputEdgePositions.contains(output) == false) + HICR_THROW_RUNTIME("[Replica %lu / %lu] Could not find output: %s's position...", _partitionIdx, _replicaIdx, output.c_str()); const auto outputEdgePos = _taskOutputEdgePositions.at(output); // Setting output edge task->setOutputEdge(output, _coordinatorDataOutputs[outputEdgePos]); - } + } // Actually run the function now function(task); @@ -162,15 +171,16 @@ class Replica final : public Base _jobManagementMutex.lock(); // Once the task has finished, mark all its outputs as finished in the job's output set - for (const auto& output : taskConfig.getOutputs()) + for (const auto &output : taskConfig.getOutputs()) { // Getting edge position corresponding to the task output - if (_taskOutputEdgePositions.contains(output) == false) HICR_THROW_RUNTIME("[Replica %lu / %lu] Could not find output: %s's position...", _partitionIdx, _replicaIdx, output.c_str()); + if (_taskOutputEdgePositions.contains(output) == false) + HICR_THROW_RUNTIME("[Replica %lu / %lu] Could not find output: %s's position...", _partitionIdx, _replicaIdx, output.c_str()); const auto outputEdgePos = _taskOutputEdgePositions.at(output); // Getting corresponding output edge // printf("[Replica %lu / %lu] Pushing Output: %s (Pos: %lu)...\n", _partitionIdx, _replicaIdx, output.c_str(), outputEdgePos); - auto& outputEdge = _activeJob->getOutputEdges()[outputEdgePos]; + auto &outputEdge = _activeJob->getOutputEdges()[outputEdgePos]; // Getting output from task outputEdge.setSatisfied(); @@ -181,7 +191,8 @@ class Replica final : public Base { // If any of the outputs is not yet satisfied, then the job is not finished bool isJobFinished = true; - for (const auto& output : _activeJob->getOutputEdges()) if (output.isSatisfied() == false) isJobFinished = false; + for (const auto &output : _activeJob->getOutputEdges()) + if (output.isSatisfied() == false) isJobFinished = false; // If it is finished, remove it as active job if (isJobFinished == true) @@ -191,11 +202,11 @@ class Replica final : public Base // Now send the coordinator the signal that we're again ready hLLM::messages::ReplicaReady message; - auto rawMessage = message.encode(); + auto rawMessage = message.encode(); // Pushing message _coordinatorControlOutput->pushMessageLocking(rawMessage); - } + } } _jobManagementMutex.unlock(); @@ -213,30 +224,32 @@ class Replica final : public Base subscribeHeartbeatEdge(_coordinatorControlOutput); // Subscribing control edges to the message service for my replicas - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::heartbeat, - _coordinatorControlInput, - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ heartbeatMessageHandler(edge, std::make_shared(message)); } }); + subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t{ + hLLM::messages::messageTypes::heartbeat, _coordinatorControlInput, [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + heartbeatMessageHandler(edge, std::make_shared(message)); + }}); // Subscribing control edges to the message service for my replicas - for (const auto& dataEdge : _coordinatorDataInputs) - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::data, - dataEdge, - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ inputDataMessageHandler(edge, std::make_shared(message)); } }); + for (const auto &dataEdge : _coordinatorDataInputs) + subscribeEdgeMessageHandler( + hLLM::Role::edgeHandlerSubscription_t{hLLM::messages::messageTypes::data, dataEdge, [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + inputDataMessageHandler(edge, std::make_shared(message)); + }}); } void heartbeatMessageHandler(const std::shared_ptr edge, const std::shared_ptr message) { - if(_deployment.getHeartbeat().visible == true) printf("[Replica %lu / %lu] Received heartbeat from coordinator.\n", _partitionIdx, _replicaIdx); + if (_deployment.getHeartbeat().visible == true) printf("[Replica %lu / %lu] Received heartbeat from coordinator.\n", _partitionIdx, _replicaIdx); } __INLINE__ void inputDataMessageHandler(const std::shared_ptr edge, const std::shared_ptr message) { // Getting prompt id from data const auto promptId = message->getPromptId(); - const auto data = message->getData(); - const auto size = message->getSize(); - const auto edgeIdx = edge->getEdgeIndex(); - const auto edgePos = _edgeIndexToVectorPositionMap[edgeIdx]; + const auto data = message->getData(); + const auto size = message->getSize(); + const auto edgeIdx = edge->getEdgeIndex(); + const auto edgePos = _edgeIndexToVectorPositionMap[edgeIdx]; // Preventing concurrent access std::lock_guard lockGuard(_jobManagementMutex); @@ -244,8 +257,16 @@ class Replica final : public Base // printf("[Replica %lu/%lu] Received data for prompt %lu/%lu, edge '%s'.\n", _partitionIdx, _replicaIdx, promptId.first, promptId.second, edge->getEdgeConfig().getName().c_str()); // If there is a current job assigned to this replica and the job corresponds to a different prompt, then fail - if (_activeJob != nullptr) if (promptId != _activeJob->getPromptId()) - HICR_THROW_RUNTIME("[Replica %lu/%lu] Received data for prompt %lu/%lu, edge '%s' but currently prompt %lu/%lu is running.\n", _partitionIdx, _replicaIdx, promptId.first, promptId.second, edge->getEdgeConfig().getName().c_str(), _activeJob->getPromptId().first, _activeJob->getPromptId().second); + if (_activeJob != nullptr) + if (promptId != _activeJob->getPromptId()) + HICR_THROW_RUNTIME("[Replica %lu/%lu] Received data for prompt %lu/%lu, edge '%s' but currently prompt %lu/%lu is running.\n", + _partitionIdx, + _replicaIdx, + promptId.first, + promptId.second, + edge->getEdgeConfig().getName().c_str(), + _activeJob->getPromptId().first, + _activeJob->getPromptId().second); // If there is no current active job if (_activeJob == nullptr) @@ -255,28 +276,28 @@ class Replica final : public Base // And set it in motion startJob(_activeJob); - } + } // Getting input corresponding to the message that arrived - auto& input = _activeJob->getInputEdges()[edgePos]; - const auto& inputName = input.getEdgeInfo().config->getName(); - const auto& inputData = input.getDataSlot(); + auto &input = _activeJob->getInputEdges()[edgePos]; + const auto &inputName = input.getEdgeInfo().config->getName(); + const auto &inputData = input.getDataSlot(); // Store a reference to the provided data into the edge (no extra copies required) input.storeDataByReference(data, size); // Assigning all interested tasks the input data - for (const auto& task : _inputEdgeTaskDependencies[inputName]) _taskFunctionNameMap[task]->setInput(inputName, inputData); + for (const auto &task : _inputEdgeTaskDependencies[inputName]) _taskFunctionNameMap[task]->setInput(inputName, inputData); // Marking input as satisfied input.setSatisfied(); } - __INLINE__ void startJob(std::shared_ptr& job) + __INLINE__ void startJob(std::shared_ptr &job) { // Get my partition configuration - const auto& partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; - const auto &tasks = partitionConfiguration->getTasks(); + const auto &partitionConfiguration = _deployment.getPartitions()[_partitionIdx]; + const auto &tasks = partitionConfiguration->getTasks(); // Clearing previous job's maps _taskLabelMap.clear(); @@ -287,7 +308,8 @@ class Replica final : public Base { // Checking the requested function was registered const auto taskFunctionName = task->getFunctionName(); - if (_registeredFunctions.contains(taskFunctionName) == false) HICR_THROW_LOGIC("The requested function name '%s' is not registered. Please register it before running the hLLM.", taskFunctionName.c_str()); + if (_registeredFunctions.contains(taskFunctionName) == false) + HICR_THROW_LOGIC("The requested function name '%s' is not registered. Please register it before running the hLLM.", taskFunctionName.c_str()); // Getting label for taskr function const auto taskId = _taskrLabelCounter++; @@ -299,12 +321,13 @@ class Replica final : public Base auto newTask = new hLLM::Task(*task, fc, taskId, _taskrFunction.get()); // Get ahold of the task dependency edge positions for dependency checking - const auto& taskInputEdgePositions = _taskInputEdgePositions[taskFunctionName]; + const auto &taskInputEdgePositions = _taskInputEdgePositions[taskFunctionName]; // Function to check the tasks inputs are present before executing it auto taskInputsCheck = [&]() { // If there is an active job, check whether the inputs for this tasks are satisfied within it - for (const auto edgePos : taskInputEdgePositions) if (_activeJob->getInputEdges()[edgePos].isSatisfied() == false) return false; + for (const auto edgePos : taskInputEdgePositions) + if (_activeJob->getInputEdges()[edgePos].isSatisfied() == false) return false; // All dependencies are satisfied, enable this task for execution return true; @@ -321,23 +344,23 @@ class Replica final : public Base } // Adding task <-> task dependencies - for (const auto &task : tasks) + for (const auto &task : tasks) { - const auto& dependentTaskName = task->getFunctionName(); - const auto& dependentTask = _taskFunctionNameMap[dependentTaskName]; + const auto &dependentTaskName = task->getFunctionName(); + const auto &dependentTask = _taskFunctionNameMap[dependentTaskName]; - for (const auto& dependedTaskName : task->getDependencies()) + for (const auto &dependedTaskName : task->getDependencies()) { - const auto& dependedTask = _taskFunctionNameMap[dependedTaskName]; + const auto &dependedTask = _taskFunctionNameMap[dependedTaskName]; dependentTask->addDependency(dependedTask); - } + } } // Adding tasks to TaskR itself - for (const auto &task : tasks) + for (const auto &task : tasks) { - const auto& taskName = task->getFunctionName(); - const auto& taskObject = _taskFunctionNameMap[taskName]; + const auto &taskName = task->getFunctionName(); + const auto &taskObject = _taskFunctionNameMap[taskName]; _taskr->addTask(taskObject); } } @@ -346,11 +369,11 @@ class Replica final : public Base const configuration::Replica::replicaIndex_t _replicaIdx; // Data Input/Output edges from/to the coordinator - std::vector> _coordinatorDataInputs; + std::vector> _coordinatorDataInputs; std::vector> _coordinatorDataOutputs; // Control Input/Output edges from/to the coordinator - std::shared_ptr _coordinatorControlInput; + std::shared_ptr _coordinatorControlInput; std::shared_ptr _coordinatorControlOutput; // Map relating task function names to their input edge positions (for data dependency checking) @@ -363,10 +386,10 @@ class Replica final : public Base std::map> _inputEdgeTaskDependencies; // Map relating task ids to their hLLM task - std::map _taskLabelMap; + std::map _taskLabelMap; // Map relating task function names to their hLLM task - std::map _taskFunctionNameMap; + std::map _taskFunctionNameMap; // The set of registered functions to use as targets for tasks const std::map _registeredFunctions; @@ -384,4 +407,4 @@ class Replica final : public Base std::mutex _jobManagementMutex; }; // class Replica -} // namespace hLLM::replica \ No newline at end of file +} // namespace hLLM::roles::partition \ No newline at end of file diff --git a/include/hllm/roles/requestManager.hpp b/include/hllm/roles/requestManager.hpp index 1b31041..def81d0 100644 --- a/include/hllm/roles/requestManager.hpp +++ b/include/hllm/roles/requestManager.hpp @@ -17,8 +17,8 @@ #include "../realTimeAnalysis.hpp" #include "../../../extern/cpp-httplib/httplib.h" -using Clock = std::chrono::steady_clock; // Monotonic clock for precise timing -using Secs = std::chrono::seconds; // Convenience alias for seconds +using Clock = std::chrono::steady_clock; // Monotonic clock for precise timing +using Secs = std::chrono::seconds; // Convenience alias for seconds namespace hLLM::roles { @@ -27,68 +27,72 @@ class RequestManager final : public hLLM::Role { public: - RequestManager() = delete; + RequestManager() = delete; ~RequestManager() = default; - RequestManager( - const configuration::Deployment deployment, - taskr::Runtime* const taskr - ) : Role(deployment, taskr), _rTA("0.0.0.0", 5003), _cli("localhost", 5003), num_responses(0), prev_num_responses(0) + RequestManager(const configuration::Deployment deployment, taskr::Runtime *const taskr) + : Role(deployment, taskr), + _rTA("0.0.0.0", 5003), + _cli("localhost", 5003), + num_responses(0), + prev_num_responses(0) { // Name of the prompt input - const auto& promptInputName = _deployment.getRequestManager()->getInput(); - const auto& resultOutputName = _deployment.getRequestManager()->getOutput(); + const auto &promptInputName = _deployment.getRequestManager()->getInput(); + const auto &resultOutputName = _deployment.getRequestManager()->getOutput(); // Getting partition list - const auto& partitions = _deployment.getPartitions(); + const auto &partitions = _deployment.getPartitions(); // Getting list of edges in the deployment - const auto& edgeConfigs = _deployment.getEdges(); + const auto &edgeConfigs = _deployment.getEdges(); - // Looking for the edge corresponding to a partition's prompt input + // Looking for the edge corresponding to a partition's prompt input for (size_t edgeIdx = 0; edgeIdx < edgeConfigs.size(); edgeIdx++) { - const auto& edgeConfig = edgeConfigs[edgeIdx]; - const auto& edgeName = edgeConfig->getName(); - + const auto &edgeConfig = edgeConfigs[edgeIdx]; + const auto &edgeName = edgeConfig->getName(); + // If this is the prompt input, then create the outgoing edge to the corresponding partition if (edgeConfig->isPromptEdge() == true) { // Looking for the partition who needs the prompt input for (configuration::Partition::partitionIndex_t idx = 0; idx < partitions.size(); idx++) { - const auto& partition = partitions[idx]; - for (const auto& task : partition->getTasks()) - for (const auto& input : task->getInputs()) + const auto &partition = partitions[idx]; + for (const auto &task : partition->getTasks()) + for (const auto &input : task->getInputs()) if (input == promptInputName) _promptConsumerPartitionIdx = idx; } // Creating the prompt sending edge - _promptOutputEdge = std::make_shared(*edgeConfig, edge::edgeType_t::requestManagerToCoordinator, edgeIdx, _promptConsumerPartitionIdx, _promptConsumerPartitionIdx, edge::Base::coordinatorReplicaIndex); + _promptOutputEdge = std::make_shared( + *edgeConfig, edge::edgeType_t::requestManagerToCoordinator, edgeIdx, _promptConsumerPartitionIdx, _promptConsumerPartitionIdx, edge::Base::coordinatorReplicaIndex); // printf("[Request Manager] Prompt Output Edge: Type: %u, EdgeIdx: %lu, CP: %lu, PP: %lu, RI: %lu\n", edge::edgeType_t::requestManagerToCoordinator, edgeIdx, _promptConsumerPartitionIdx, _promptConsumerPartitionIdx, edge::Base::coordinatorReplicaIndex); } - } + } - // Looking for the edge corresponding to a partition's prompt input + // Looking for the edge corresponding to a partition's prompt input for (size_t edgeIdx = 0; edgeIdx < edgeConfigs.size(); edgeIdx++) { - const auto& edgeConfig = edgeConfigs[edgeIdx]; - const auto& edgeName = edgeConfig->getName(); - + const auto &edgeConfig = edgeConfigs[edgeIdx]; + const auto &edgeName = edgeConfig->getName(); + // If this is the result output, then create the incoming edge from the corresponding partition if (edgeConfig->isResultEdge() == true) { // Looking for the partition who needs the prompt input for (configuration::Partition::partitionIndex_t idx = 0; idx < partitions.size(); idx++) { - const auto& partition = partitions[idx]; - for (const auto& task : partition->getTasks()) - for (const auto& output : task->getOutputs()) + const auto &partition = partitions[idx]; + for (const auto &task : partition->getTasks()) + for (const auto &output : task->getOutputs()) if (output == resultOutputName) _resultProducerPartitionIdx = idx; } // Creating the result-receiving edge - _resultInputEdge = std::make_shared(*edgeConfig, edge::edgeType_t::coordinatorToRequestManager, edgeIdx, _resultProducerPartitionIdx, _resultProducerPartitionIdx, edge::Base::coordinatorReplicaIndex); + _resultInputEdge = std::make_shared( + *edgeConfig, edge::edgeType_t::coordinatorToRequestManager, edgeIdx, _resultProducerPartitionIdx, _resultProducerPartitionIdx, edge::Base::coordinatorReplicaIndex); // printf("[Request Manager] Result Input Edge: Type: %u, EdgeIdx: %lu, CP: %lu, PP: %lu, RI: %lu\n", edge::edgeType_t::coordinatorToRequestManager, edgeIdx, _resultProducerPartitionIdx, _resultProducerPartitionIdx, edge::Base::coordinatorReplicaIndex); } } @@ -98,7 +102,7 @@ class RequestManager final : public hLLM::Role } // Gets the memory slots required by the edges - __INLINE__ void getMemorySlotsToExchange(std::vector& memorySlots) + __INLINE__ void getMemorySlotsToExchange(std::vector &memorySlots) { _promptOutputEdge->getMemorySlotsToExchange(memorySlots); _resultInputEdge->getMemorySlotsToExchange(memorySlots); @@ -133,7 +137,7 @@ class RequestManager final : public hLLM::Role _sessionManagementMutex.unlock(); // Wait until session is connected - while(session->isConnected() == false); + while (session->isConnected() == false); // Returning session return session; @@ -148,9 +152,10 @@ class RequestManager final : public hLLM::Role _taskr->addService(&_taskrPromptHandlingService); // Subscribing data input edges for the incoming prompt responses - subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t { hLLM::messages::messageTypes::data, - _resultInputEdge, - [this](const std::shared_ptr edge, const hLLM::edge::Message& message){ responseDataMessageHandler(edge, std::make_shared(message)); } }); + subscribeEdgeMessageHandler(hLLM::Role::edgeHandlerSubscription_t{ + hLLM::messages::messageTypes::data, _resultInputEdge, [this](const std::shared_ptr edge, const hLLM::edge::Message &message) { + responseDataMessageHandler(edge, std::make_shared(message)); + }}); // Adding session management service _taskr->addService(&_taskrSessionManagementService); @@ -160,17 +165,18 @@ class RequestManager final : public hLLM::Role { // Getting prompt id from data const auto promptId = message->getPromptId(); - const auto data = message->getData(); - const auto size = message->getSize(); - - const std::string response = std::string((const char*)data, size); + const auto data = message->getData(); + const auto size = message->getSize(); + + const std::string response = std::string((const char *)data, size); // printf("[Request Manager] Received response '%s' for prompt %lu/%lu, edge '%s'.\n", response.c_str(), promptId.first, promptId.second, edge->getEdgeConfig().getName().c_str()); // Getting prompt object and removing it from the active prompt map. We've got the response now _activePromptMapMutex.lock(); - - // Check that the prompt actually exists - if (_activePromptMap.contains(promptId) == false) HICR_THROW_RUNTIME("Prompt map entry for prompt %lu/%lu not found. This must be a bug in hLLM", promptId.first, promptId.second); + + // Check that the prompt actually exists + if (_activePromptMap.contains(promptId) == false) + HICR_THROW_RUNTIME("Prompt map entry for prompt %lu/%lu not found. This must be a bug in hLLM", promptId.first, promptId.second); // Recovering prompt from the map auto prompt = _activePromptMap.at(promptId); @@ -183,41 +189,44 @@ class RequestManager final : public hLLM::Role // Setting response into the prompt now prompt->setResponse(response); - + // printf("Prompt Map Size: %lu\n", _activePromptMap.size()); // usleep(10000); // Increasing the counter // Compute the new average response per minute value now_time = Clock::now(); - + auto time_diff_sec = std::chrono::duration(now_time - prev_time).count(); const double resp_diff = double(++num_responses - prev_num_responses); - - const double avg_res_per_minute = resp_diff/time_diff_sec * 60.0; + + const double avg_res_per_minute = resp_diff / time_diff_sec * 60.0; // Update the prev_time if it was longer than a second - if(time_diff_sec > 10.0) + if (time_diff_sec > 10.0) { - prev_time = now_time; + prev_time = now_time; prev_num_responses = num_responses; } - std::string json_data = - "{\n" - " \"partition\": {\n" - " \"name\": \"partition1\",\n" // For now, fake partition value - " \"status\": \"active\",\n" - " \"num_responses\": " + std::to_string(num_responses) + ",\n" - " \"avg_responses_per_min\": " + std::to_string(avg_res_per_minute) + "\n" - " }\n" - "}"; + std::string json_data = "{\n" + " \"partition\": {\n" + " \"name\": \"partition1\",\n" // For now, fake partition value + " \"status\": \"active\",\n" + " \"num_responses\": " + + std::to_string(num_responses) + + ",\n" + " \"avg_responses_per_min\": " + + std::to_string(avg_res_per_minute) + + "\n" + " }\n" + "}"; auto res = _cli.Post("/data", json_data, "application/json"); // Error handling to check if the HTTP post was successfull - if(!res) std::cerr << "Failed to connect to server.\n"; + if (!res) std::cerr << "Failed to connect to server.\n"; } ///////////// Prompt handling service @@ -230,18 +239,18 @@ class RequestManager final : public hLLM::Role if (_pendingNewPromptsQueue.empty() == false) { // Getting next pending session to connect - const auto prompt = _pendingNewPromptsQueue.front(); - const auto promptId = prompt->getPromptId(); - const auto& promptData = prompt->getPrompt(); - + const auto prompt = _pendingNewPromptsQueue.front(); + const auto promptId = prompt->getPromptId(); + const auto &promptData = prompt->getPrompt(); + // Sending data to the partition that takes the prompt as input - const auto messageData = (const uint8_t*)promptData.data(); - const size_t messageSize = promptData.size()+1; + const auto messageData = (const uint8_t *)promptData.data(); + const size_t messageSize = promptData.size() + 1; // Interrupt service if the output edge (connecting to the entry partition) is full _promptOutputEdge->lock(); const auto isPromptOutputEdgeFull = _promptOutputEdge->isFull(messageSize); - _promptOutputEdge->unlock(); + _promptOutputEdge->unlock(); if (isPromptOutputEdgeFull == true) return; // Registering prompt @@ -264,9 +273,9 @@ class RequestManager final : public hLLM::Role _pendingNewPromptsQueue.pop(); } } - taskr::Service::serviceFc_t _promptHandlingServiceFunction = [this](){ this->promptHandlingService(); }; - taskr::Service _taskrPromptHandlingService = taskr::Service(_promptHandlingServiceFunction); - + taskr::Service::serviceFc_t _promptHandlingServiceFunction = [this]() { this->promptHandlingService(); }; + taskr::Service _taskrPromptHandlingService = taskr::Service(_promptHandlingServiceFunction); + ///////////// Session management service (no concurrent access active service map) __INLINE__ void sessionManagementServiceFunction() { @@ -277,7 +286,7 @@ class RequestManager final : public hLLM::Role { // Getting next pending session to connect auto session = _pendingSessionConnectionsQueue.front(); - + // Registering session _activeSessionMap.insert({session->getSessionId(), session}); @@ -289,10 +298,10 @@ class RequestManager final : public hLLM::Role } // Iterating over the active sessions in search for the next message to parse - for (const auto& entry : _activeSessionMap) + for (const auto &entry : _activeSessionMap) { // Getting session - const auto& session = entry.second; + const auto &session = entry.second; // Getting next prompt from the session, if any const auto prompt = session->getPrompt(); @@ -302,14 +311,14 @@ class RequestManager final : public hLLM::Role // Otherwise, process it pushPrompt(prompt); - } + } } - taskr::Service::serviceFc_t _taskrSessionManagementFunction = [this](){ this->sessionManagementServiceFunction(); }; - taskr::Service _taskrSessionManagementService = taskr::Service(_taskrSessionManagementFunction); - + taskr::Service::serviceFc_t _taskrSessionManagementFunction = [this]() { this->sessionManagementServiceFunction(); }; + taskr::Service _taskrSessionManagementService = taskr::Service(_taskrSessionManagementFunction); + // Edge to copy a prompt to a coordinator std::shared_ptr _promptOutputEdge; - + // Edge to receive a result from a coordinator std::shared_ptr _resultInputEdge; @@ -320,14 +329,14 @@ class RequestManager final : public hLLM::Role std::queue> _pendingNewPromptsQueue; // Active prompt map - std::mutex _activePromptMapMutex; + std::mutex _activePromptMapMutex; std::map> _activePromptMap; // Index within the deployment of the partition who will consume the initial prompt - configuration::Partition::partitionIndex_t _promptConsumerPartitionIdx; + configuration::Partition::partitionIndex_t _promptConsumerPartitionIdx; // Index within the deployment of the partition who will produce the prompt result - configuration::Partition::partitionIndex_t _resultProducerPartitionIdx; + configuration::Partition::partitionIndex_t _resultProducerPartitionIdx; // Global counter for session ids std::atomic _currentSessionId = 0; @@ -355,4 +364,4 @@ class RequestManager final : public hLLM::Role }; // class RequestManager -} // namespace hLLM \ No newline at end of file +} // namespace hLLM::roles \ No newline at end of file diff --git a/include/hllm/session.hpp b/include/hllm/session.hpp index 42310ad..f4a928a 100644 --- a/include/hllm/session.hpp +++ b/include/hllm/session.hpp @@ -8,9 +8,9 @@ namespace hLLM { -namespace roles +namespace roles { - class RequestManager; +class RequestManager; } class Session @@ -19,24 +19,24 @@ class Session friend class roles::RequestManager; - Session() = delete; + Session() = delete; ~Session() = default; - Session(const sessionId_t sessionId) : - _sessionId(sessionId) + Session(const sessionId_t sessionId) + : _sessionId(sessionId) { _currentMessageId = 0; } __INLINE__ const sessionId_t getSessionId() const { return _sessionId; } - - __INLINE__ std::shared_ptr createPrompt(const std::string& promptString) + + __INLINE__ std::shared_ptr createPrompt(const std::string &promptString) { // Getting and increasing message id const auto messageId = _currentMessageId++; // Creating prompt object - const auto promptId = Prompt::promptId_t({_sessionId, messageId}); + const auto promptId = Prompt::promptId_t({_sessionId, messageId}); const auto promptObject = std::make_shared(promptId, promptString); return promptObject; @@ -62,15 +62,19 @@ class Session // Getting prompt from the queue, if there's any _promptMutex.lock(); - if (_newPromptQueue.empty() == false) { prompt = _newPromptQueue.front(); _newPromptQueue.pop(); } + if (_newPromptQueue.empty() == false) + { + prompt = _newPromptQueue.front(); + _newPromptQueue.pop(); + } _promptMutex.unlock(); return prompt; } - bool _isConnected = false; + bool _isConnected = false; const sessionId_t _sessionId; - messageId_t _currentMessageId; + messageId_t _currentMessageId; // Mutual exclusion for managing prompts std::mutex _promptMutex; diff --git a/include/hllm/task.hpp b/include/hllm/task.hpp index c2dc680..df628d8 100644 --- a/include/hllm/task.hpp +++ b/include/hllm/task.hpp @@ -11,7 +11,10 @@ namespace hLLM { -namespace roles::partition { class Replica; } +namespace roles::partition +{ +class Replica; +} class Task final : public taskr::Task { @@ -23,17 +26,13 @@ class Task final : public taskr::Task Task() = delete; - Task(const hLLM::configuration::Task taskConfig, - const taskFunction_t &function, - const taskr::taskId_t taskId, - taskr::Function *entryPoint - ) + Task(const hLLM::configuration::Task taskConfig, const taskFunction_t &function, const taskr::taskId_t taskId, taskr::Function *entryPoint) : taskr::Task(taskId, entryPoint), _taskConfig(taskConfig), _function(function) { // Adding input token holders - for (const auto& input : taskConfig.getInputs()) _inputs[input] = nullptr; + for (const auto &input : taskConfig.getInputs()) _inputs[input] = nullptr; } ~Task() = default; @@ -42,11 +41,12 @@ class Task final : public taskr::Task { // Check whether the input token exists for this task if (_inputs.contains(inputName) == false) - HICR_THROW_RUNTIME("Task '%s' trying to access input '%s' which has not been declared for this task.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); + HICR_THROW_RUNTIME("Task '%s' trying to access input '%s' which has not been declared for this task.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); // Check whether the input has been given (sanity check) if (_inputs[inputName] == nullptr) - HICR_THROW_RUNTIME("Task '%s' trying to access input '%s' which has not been provided. This must be a bug in hLLM.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); + HICR_THROW_RUNTIME( + "Task '%s' trying to access input '%s' which has not been provided. This must be a bug in hLLM.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); // Then, get the token const auto token = _inputs.at(inputName); @@ -60,17 +60,16 @@ class Task final : public taskr::Task __INLINE__ void setOutput(const std::string &outputName, const std::shared_ptr &memorySlot) { - if (_outputEdges.contains(outputName) == false) + if (_outputEdges.contains(outputName) == false) HICR_THROW_RUNTIME("Task '%s' is setting output '%s' which is not defined for this task.\n", _taskConfig.getFunctionName().c_str(), outputName.c_str()); - if (_outputsSent.contains(outputName) == true) - HICR_THROW_RUNTIME("Task '%s' is setting output '%s' twice.\n", _taskConfig.getFunctionName().c_str(), outputName.c_str()); + if (_outputsSent.contains(outputName) == true) HICR_THROW_RUNTIME("Task '%s' is setting output '%s' twice.\n", _taskConfig.getFunctionName().c_str(), outputName.c_str()); // Getting edge to send the output through - const auto& outputEdge = _outputEdges[outputName]; + const auto &outputEdge = _outputEdges[outputName]; // Creating new message - const auto message = messages::Data((const uint8_t*)memorySlot->getPointer(), memorySlot->getSize(), _promptId); + const auto message = messages::Data((const uint8_t *)memorySlot->getPointer(), memorySlot->getSize(), _promptId); // Encoding message const auto rawMessage = message.encode(); @@ -89,51 +88,53 @@ class Task final : public taskr::Task private: __INLINE__ taskFunction_t getFunction() const { return _function; } - __INLINE__ const hLLM::configuration::Task& getConfig() const { return _taskConfig; } - __INLINE__ void setPartitionIdx(const configuration::Partition::partitionIndex_t partitionIdx) { _partitionIdx = partitionIdx; } - __INLINE__ void setReplicaIdx(const configuration::Replica::replicaIndex_t replicaIdx) { _replicaIdx = replicaIdx; } - __INLINE__ void setPromptId(const hLLM::Prompt::promptId_t promptId) { _promptId = promptId; } - __INLINE__ void setOutputEdge(const std::string& output, const std::shared_ptr edge) { _outputEdges[output] = edge; } + __INLINE__ const hLLM::configuration::Task &getConfig() const { return _taskConfig; } + __INLINE__ void setPartitionIdx(const configuration::Partition::partitionIndex_t partitionIdx) { _partitionIdx = partitionIdx; } + __INLINE__ void setReplicaIdx(const configuration::Replica::replicaIndex_t replicaIdx) { _replicaIdx = replicaIdx; } + __INLINE__ void setPromptId(const hLLM::Prompt::promptId_t promptId) { _promptId = promptId; } + __INLINE__ void setOutputEdge(const std::string &output, const std::shared_ptr edge) { _outputEdges[output] = edge; } __INLINE__ bool isReady() const { - for (const auto& input : _inputs) if (input.second == nullptr) return false; + for (const auto &input : _inputs) + if (input.second == nullptr) return false; return true; } __INLINE__ void verifyOutputsSent() const { // Check all outputs have been sent - for (const auto& output : _taskConfig.getOutputs()) + for (const auto &output : _taskConfig.getOutputs()) if (_outputsSent.contains(output) == false) - HICR_THROW_RUNTIME("Task '%s' did not set output '%s' before completion.\n", _taskConfig.getFunctionName().c_str(), output.c_str()); + HICR_THROW_RUNTIME("Task '%s' did not set output '%s' before completion.\n", _taskConfig.getFunctionName().c_str(), output.c_str()); } __INLINE__ void setInput(const std::string &inputName, const std::shared_ptr token) { // Check whether the input token exists for this task if (_inputs.contains(inputName) == false) - HICR_THROW_RUNTIME("Task '%s' trying to set input '%s' which has not been declared for this task. This must be a bug in hLLM.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); + HICR_THROW_RUNTIME( + "Task '%s' trying to set input '%s' which has not been declared for this task. This must be a bug in hLLM.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); // Check whether the input has been given (sanity check) if (_inputs[inputName] != nullptr) - HICR_THROW_RUNTIME("Task '%s' trying to set input '%s' which has already been set. This must be a bug in hLLM.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); + HICR_THROW_RUNTIME("Task '%s' trying to set input '%s' which has already been set. This must be a bug in hLLM.\n", _taskConfig.getFunctionName().c_str(), inputName.c_str()); - _inputs[inputName] = token; + _inputs[inputName] = token; } - const hLLM::configuration::Task _taskConfig; - const taskFunction_t _function; + const hLLM::configuration::Task _taskConfig; + const taskFunction_t _function; std::map> _inputs; // A map relating an output name to the edge through which the data is sent std::map> _outputEdges; - std::set _outputsSent; + std::set _outputsSent; - hLLM::Prompt::promptId_t _promptId; + hLLM::Prompt::promptId_t _promptId; configuration::Partition::partitionIndex_t _partitionIdx; - configuration::Replica::replicaIndex_t _replicaIdx; + configuration::Replica::replicaIndex_t _replicaIdx; }; // class Task From 89840b9c773bf6bfb47a99d99289c98987ed3fcc Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 28 Apr 2026 14:11:41 +0200 Subject: [PATCH 4/4] ci: checkout repo with submodule cloning --- .github/workflows/master-test-workflow.yml | 4 +++- .github/workflows/pr-development-workflow.yml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/master-test-workflow.yml b/.github/workflows/master-test-workflow.yml index a4781c5..30b4007 100644 --- a/.github/workflows/master-test-workflow.yml +++ b/.github/workflows/master-test-workflow.yml @@ -36,7 +36,9 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 + with: + submodules: recursive - name: Setup run: source /home/hicr/.bashrc && meson setup build -Dengines=mpi,cloudr -Db_coverage=true -DbuildTests=true -DbuildExamples=true -DcompileWarningsAsErrors=false diff --git a/.github/workflows/pr-development-workflow.yml b/.github/workflows/pr-development-workflow.yml index b2df216..5c43f2c 100644 --- a/.github/workflows/pr-development-workflow.yml +++ b/.github/workflows/pr-development-workflow.yml @@ -36,7 +36,9 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 + with: + submodules: recursive - name: Setup run: source /home/hicr/.bashrc && meson setup build -Db_coverage=true -Dengines=mpi,cloudr -DbuildTests=true -DbuildExamples=true -DcompileWarningsAsErrors=false