-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlcs_parallel.cpp
More file actions
237 lines (192 loc) · 7.82 KB
/
lcs_parallel.cpp
File metadata and controls
237 lines (192 loc) · 7.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#include "core/cxxopts.h"
#include "core/get_time.h"
#include "core/utils.h"
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <unordered_set>
#include <functional>
#include <thread>
#define LCS_VERSION "parallel"
#define DEFAULT_NUMBER_OF_THREADS 1
#define DEFAULT_STRING_X "abcd"
#define DEFAULT_STRING_Y "acbad"
using namespace std;
struct ThreadData {
int id;
double timeTaken;
double timeWaitedAtBarrier;
ThreadData(int _id, double _timeTaken, double _timeWaitedAtBarrier)
: id(_id), timeTaken(_timeTaken), timeWaitedAtBarrier(_timeWaitedAtBarrier) {}
};
void getCellsToCalculate(vector<int>& vec, vector<vector<int>>& dp, int rows, int cols, int thread_id, uint num_threads, int diag) {
// Find start position of the diagonal
// (all threads compute the same number here)
int start_row = std::max(0, diag - (cols - 1));
int start_col = std::min(diag, cols - 1);
// number of elements in this diagonal
// (all threads calculate the same number here)
int elements_in_diagonal = std::min(start_col + 1, rows - start_row);
// number of threads that get an extra cell to calculate
// (all threads calculate same num here)
int z = elements_in_diagonal % num_threads;
// min number of cells each thread gets
// (all threads calculate same number here)
int base_count = elements_in_diagonal / num_threads;
// the starting index along the current diagonal that the current thread needs to start at
// this is the only number that is uniquely computed per thread
int start_idx = std::min(thread_id, z) * (base_count + 1) + std::max(0, thread_id - z) * (base_count);
// put the results in the vector
vec[0] = start_row + start_idx + 1;
vec[1] = start_col - start_idx + 1;
vec[2] = base_count + (thread_id < (z) ? 1 : 0);
}
mutex print_mutex;
vector<string> findLCS(const string &X, const string &Y, vector<ThreadData> &threadData, int num_threads) {
timer t1;
t1.start();
int n = X.length();
int m = Y.length();
CustomBarrier barrier(num_threads);
uint num_diagonals = n + m - 1;
// Create a DP table to store lengths of longest common subsequence
vector<vector<int>> dp(n + 1, vector<int>(m + 1));
auto threadWorker = [&](int thread_id) {
timer t;
t.start();
timer t2;
// stores info for the cells this thread needs to calculate on the current diagonal.
// updated once per diagonal.
// the first element is the starting row.
// the second element is the starting column.
// the third element is how many elements they need to calculate for the current diagonal.
vector<int> cellsToCalculate(3);
int x;
int y;
int z;
for (uint i = 0; i < num_diagonals; i++) {
// fill cellsToCalculate with the cells this thread needs to calculate
getCellsToCalculate(cellsToCalculate, dp, n, m, thread_id, num_threads, i);
// store the vector values in variables so its faster to read them below
// i have no idea if that is actually faster but it seems right lol
x = cellsToCalculate[0];
y = cellsToCalculate[1];
z = cellsToCalculate[2];
// fill dp table
for (int j = 0; j < z; j++) {
if (X[x-1] == Y[y-1]) {
dp[x][y] = dp[x - 1][y - 1] + 1;
} else {
dp[x][y] = max(dp[x - 1][y], dp[x][y - 1]);
}
x += 1;
y -= 1;
}
// total time this thread waited at the barrier
t2.start();
barrier.wait();
threadData[thread_id].timeWaitedAtBarrier += t2.stop();
}
// total time this thread spent computing DP table
threadData[thread_id].timeTaken = t.stop();
};
vector<thread> threads;
// start and stop threads
for (uint i = 0; i < num_threads; i++ ) {
threads.emplace_back(threadWorker, i);
}
for (auto &t : threads) {
t.join();
}
// Set to store unique LCS
unordered_set<string> lcsSet;
// Helper function to backtrack and find all LCS
function<void(int, int, string)> backtrack = [&](int i, int j, string currentLCS) {
if (i == 0 || j == 0) {
// If we've found a valid LCS that is non-empty, insert it
if (!currentLCS.empty()) {
lcsSet.insert(currentLCS);
}
return;
}
if (X[i - 1] == Y[j - 1]) {
// If characters match, add the character to the LCS and move diagonally
backtrack(i - 1, j - 1, X[i - 1] + currentLCS);
} else {
// If characters do not match, move in both directions (up and left)
if (dp[i - 1][j] == dp[i][j]) {
backtrack(i - 1, j, currentLCS);
}
if (dp[i][j - 1] == dp[i][j]) {
backtrack(i, j - 1, currentLCS);
}
}
};
// Start at endpoint and work backwards
backtrack(n, m, "");
// threadData[thread_id].timeTaken = t1.stop();
if (lcsSet.empty()) {
return {}; // Return an empty vector if no LCS exists
}
// Return a vector that contains all LCS
vector<string> result(lcsSet.begin(), lcsSet.end());
return result;
}
int main(int argc, char **argv) {
// Begin CLI parsing
cxxopts::Options options("lcs", "Find longest common subsequence.");
options.add_options(
"",
{
{ "t", "Number of threads", cxxopts::value<uint>()->default_value(std::to_string(DEFAULT_NUMBER_OF_THREADS)) },
{ "x", "1st sequence", cxxopts::value<string>()->default_value(DEFAULT_STRING_X) },
{ "y", "2nd sequence", cxxopts::value<string>()->default_value(DEFAULT_STRING_Y) },
}
);
auto cli = options.parse(argc, argv);
uint nThreads = cli["t"].as<uint>();
if (LCS_VERSION == "serial" && nThreads != 1) { // error when serial version > 1 thread
printf("Error : Number of threads for serial version must be 1.\n");
return 1;
}
string X = cli["x"].as<string>(); // sequence 1
string Y = cli["y"].as<string>(); // sequence 2
// End of CLI parsing
printf("LCS Version : %s\n", LCS_VERSION);
printf("Number of threads : %u\n", nThreads);
printf("Number of threads : %u\n", nThreads);
printf("Sequence X : %s\n", X.c_str());
printf("Sequence Y : %s\n", Y.c_str());
printf("Finding longest common subsequence...\n");
// --------------------------------------------------------------------------------
timer mainTimer;
mainTimer.start();
// Initialize thread data storage
vector<ThreadData> threadDataList;
for (int i = 0; i < nThreads; i++) {
threadDataList.push_back(ThreadData(i, 0.0, 0.0));
}
// Find LCS
vector<string> lcsResults = findLCS(X, Y, threadDataList, nThreads);
// --------------------------------------------------------------------------------
double timeTaken = mainTimer.stop();
// Print all found LCS sequences
size_t nSubsequences = lcsResults.size();
if (nSubsequences > 0) {
size_t lcsLength = lcsResults[0].size();
printf("LCS length : %zd\n", lcsLength);
for (const string &lcs : lcsResults) {
printf("Length %zd subsequence : %s\n", lcsLength, lcs.c_str());
}
} else {
printf("No subsequence found\n");
}
// Print thread data
printf("id, time_taken, time_waited_at_barrier\n");
for (int i = 0; i < nThreads; i++) {
printf("%d, %.4f, %.4f\n", threadDataList[i].id, threadDataList[i].timeTaken, threadDataList[i].timeWaitedAtBarrier);
}
printf("Time taken (in seconds) : %.4f\n", timeTaken);
return 0;
}