Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
ovlpjoin_parallel.h
Go to the documentation of this file.
1/*
2 * author: Dong Deng
3 * modified: Zhencan Peng in rutgers-db/RedPajama_Analysis
4 * modified: Yunqi Li
5 * contact: liyunqixa@gmail.com
6 */
7#ifndef _OVLP_JOIN_PARALLEL_H_
8#define _OVLP_JOIN_PARALLEL_H_
9
10#include "config.h"
11#include "type.h"
12#include "index.h"
13#include <cmath>
14#include <cstdio>
15#include <iostream>
16#include <fstream>
17#include <functional>
18#include <vector>
19#include <unordered_map>
20#include <unordered_set>
21#include <string>
22#include <algorithm>
23#include <queue>
24#include <array>
25#include <chrono>
26#include <string.h>
27#include <inttypes.h>
28#include <sys/time.h>
29#include <sys/sysinfo.h>
30#include <assert.h>
31#include <omp.h>
32
35
36struct combination_p2;
38{
39public:
40 int N{0};
41 int id{0};
42 bool completed{false};
43 std::vector<int> curr;
44
45 combination_p1(int d, int beg, const OvlpRSJoinParallel& joiner);
46 combination_p1(int d, int beg, const OvlpSelfJoinParallel &joiner);
47
48 inline int getlastcurr(const OvlpRSJoinParallel& joiner);
49 inline int getlastcurr(const OvlpSelfJoinParallel &joiner);
50
51 // compute next combination_p1
52 void next(const OvlpRSJoinParallel &joiner);
53 void print(const OvlpRSJoinParallel &joiner) const;
54 bool stepback(const int i, const OvlpRSJoinParallel &joiner);
55 bool stepback(const int i, const OvlpSelfJoinParallel &joiner);
56 void binary(const combination_p1 &value, const OvlpSelfJoinParallel &joiner);
57 void binary(const combination_p2 &value, const OvlpRSJoinParallel &joiner);
58
59 // test unit
60 bool ifsame(const std::vector<ui> &data, const OvlpRSJoinParallel &joiner);
61};
62
64{
65public:
66 int N{0};
67 int id{0};
68 bool completed{false};
69 std::vector<int> curr;
70
71 combination_p2(int d, int beg, const OvlpRSJoinParallel &joiner);
72
73 inline int getlastcurr(const OvlpRSJoinParallel &joiner);
74
75 // compute next combination_p2
76 void next(const OvlpRSJoinParallel &joiner);
77 void print(const OvlpRSJoinParallel &joiner) const;
78 bool stepback(const int i, const OvlpRSJoinParallel &joiner);
79 void binary(const combination_p2 &value, const OvlpRSJoinParallel &joiner);
80 void binary(const combination_p1 &value, const OvlpRSJoinParallel &joiner);
81
82 // test unit
83 bool ifsame(const std::vector<ui> &data, const OvlpRSJoinParallel &joiner);
84};
85
86
87/*
88 This is a class that uses overlapjoin for two datasets (Set R and set S) to join them
89 We gonna implement it in a parelled method
90*/
92{
93public:
94 int n1{0}, n2{0}; // R S sizes
95 int c{0}; // threshold
97
98 std::vector<std::vector<ui>> records1, records2; // two sets
99 std::vector<std::vector<ui>> datasets1, datasets2; // two working datasets
100 std::vector<double> recWeights1, recWeights2;
101 std::vector<double> wordwt;
102 std::vector<std::pair<int, int>> idmap_records1, idmap_records2;
103 std::vector<std::vector<std::pair<int, int>>> ele_lists1, ele_lists2;
104 std::vector<std::pair<int, int>> result_pairs[MAXTHREADNUM];
106 std::vector<combination_p1> combs1[MAXTHREADNUM];
107 std::vector<combination_p2> combs2[MAXTHREADNUM]; // comb
109#if MAINTAIN_VALUE_OVLP == 1
110 bool isWeightedComp{false};
111 // only build heap when the res size is greater than MAX
112 std::vector<WeightPair> result_pairs_[MAXTHREADNUM];
113 int isHeap[MAXTHREADNUM] = { 0 };
114#endif
115
117 int64_t result_num;
118
119 void overlapjoin(int overlap_threshold, std::vector<std::pair<int, int>> &finalPairs);
120 void small_case(int L1, int R1, int L2, int R2, std::vector<std::pair<int, int>> &finalPairs);
121
122 bool if_external_IO = false;
124
125 OvlpRSJoinParallel(const std::vector<std::vector<ui>> &sorted_records_1, const std::vector<std::vector<ui>> &sorted_records_2,
126 const std::vector<double> &rec1wt, const std::vector<double> &rec2wt, const std::vector<double> &_wordwt,
127 ui _maxHeapSize = 0, bool _isWeightedComp = false)
128 : records1(sorted_records_1), records2(sorted_records_2), recWeights1(rec1wt), recWeights2(rec2wt), wordwt(_wordwt) {
129 // reset everything
130 c = 0;
131 result_num = 0;
132 candidate_num = 0;
133
134 maxHeapSize = _maxHeapSize == 0 ? MAX_PAIR_SIZE : _maxHeapSize;
135#if MAINTAIN_VALUE_OVLP == 1
136 isWeightedComp = _isWeightedComp;
137 for(int tid = 0; tid < MAXTHREADNUM; tid++)
138 result_pairs_[tid].reserve(maxHeapSize);
139#endif
140 }
141
142 double weightedOverlapCoeff(int id1, int id2) {
143 std::vector<ui> res;
144 std::set_intersection(records1[id1].begin(), records1[id1].end(),
145 records2[id2].begin(), records2[id2].end(),
146 std::back_inserter(res));
147 double ovlp = 0.0;
148 for(const auto &e : res)
149 ovlp += wordwt[e];
150 return ovlp / std::min(recWeights1[id1], recWeights2[id2]);
151 // return ovlp;
152 }
153
154 double overlapCoeff(int id1, int id2) {
155 std::vector<ui> res;
156 std::set_intersection(records1[id1].begin(), records1[id1].end(),
157 records2[id2].begin(), records2[id2].end(),
158 back_inserter(res));
159
160 double ovlp = res.size() * 1.0;
161 return ovlp / std::min(records1[id1].size(), records2[id2].size()) * 1.0;
162 // return ovlp;
163 }
164
165
166 void set_external_store(const std::string &_resPair_path){
167 if_external_IO = true;
168 resultPair_storePath = _resPair_path;
169 }
170
171public:
172 bool comp_comb1(const int a, const int b, int tid) {
173 // cout << c << " ";
174 auto & c1 = combs1[tid][a];
175 auto & c2 = combs1[tid][b];
176 for (int i = 0; i < c; i++) {
177 if (datasets1[c1.id][c1.curr[i]] > datasets1[c2.id][c2.curr[i]])
178 return false;
179 else if (datasets1[c1.id][c1.curr[i]] < datasets1[c2.id][c2.curr[i]])
180 return true;
181 }
182 return c1.id > c2.id;
183 }
184 bool comp_comb2(const int a, const int b, int tid) {
185 auto & c1 = combs2[tid][a];
186 auto & c2 = combs2[tid][b];
187 for (int i = 0; i < c; i++) {
188 if (datasets2[c1.id][c1.curr[i]] > datasets2[c2.id][c2.curr[i]])
189 return false;
190 else if (datasets2[c1.id][c1.curr[i]] < datasets2[c2.id][c2.curr[i]])
191 return true;
192 }
193 return c1.id > c2.id;
194 }
195
196public:
197 // build heap for combination_p1
198 bool build_heap(const std::vector<std::pair<int,int>> &vec, const std::vector<std::vector<ui>> &dataset,
199 int L, std::vector<int> &heap, std::vector<combination_p1> &combs, int &heap_size,
200 int tid);
201 // build heap for combination_p2
202 bool build_heap(const std::vector<std::pair<int,int>> &vec, const std::vector<std::vector<ui>> &dataset,
203 int L, std::vector<int> &heap, std::vector<combination_p2> &combs, int &heap_size,
204 int tid);
205};
206
207
209{
210public:
211 int n1{0}; // size
212 int c{0}; // threshold
215
216 std::vector<std::vector<ui>> records; // set
217 std::vector<std::vector<ui>> datasets; // working datasets
218 std::vector<double> weights;
219 std::vector<double> wordwt;
220 std::vector<std::pair<int, int>> idmap_records;
221 std::vector<std::vector<std::pair<int, int>>> ele_lists;
222 std::vector<std::pair<int, int>> result_pairs[MAXTHREADNUM];
223 std::vector<int> heap[MAXTHREADNUM];
224 std::vector<combination_p1> combs[MAXTHREADNUM]; // comb
225 std::unordered_set<int> random_ids;
226 std::vector<std::pair<int, int>> buck;
228#if MAINTAIN_VALUE_OVLP == 1
229 bool isWeightedComp{false};
230 // only build heap when the res size is greater than MAX
231 std::vector<WeightPair> result_pairs_[MAXTHREADNUM];
232 int isHeap[MAXTHREADNUM] = { 0 };
233#endif
234 int64_t candidate_num{0};
235 int64_t result_num{0};
236 int64_t list_cost{0};
237 double heap_cost{0.0};
238 double binary_cost{0.0};
239 uint64_t heap_op{0};
240 int64_t large_cost{0};
241 int64_t large_est_cost{0};
242 int alive_id{0};
243
244 void overlapjoin(int overlap_threshold, std::vector<std::pair<int, int>> &finalPairs);
245 void small_case(int L, int R, std::vector<std::pair<int, int>> &finalPairs);
246 // large case
247 int64_t small_estimate(int L, int R);
248 int64_t large_estimate(int L, int R);
249 int divide(int nL);
250 int estimate();
251 void large_case(int L, int R, std::vector<std::pair<int, int>> &finalPairs);
252
253 bool if_external_IO = false;
255
256 OvlpSelfJoinParallel(const std::vector<std::vector<ui>> &sorted_records, const std::vector<double> &recwt,
257 const std::vector<double> &_wordwt, ui _maxHeapSize = 0, bool _isWeightedComp = false)
258 : records(sorted_records), weights(recwt), wordwt(_wordwt) {
259 // reset everything
260 c = 0;
261 result_num = 0;
262 candidate_num = 0;
263
264 maxHeapSize = _maxHeapSize == 0 ? MAX_PAIR_SIZE : _maxHeapSize;
265#if MAINTAIN_VALUE_OVLP == 1
266 isWeightedComp = _isWeightedComp;
267 for(int tid = 0; tid < MAXTHREADNUM; tid++)
268 result_pairs_[tid].reserve(maxHeapSize);
269#endif
270 }
271
272 double weightedOverlapCoeff(int id1, int id2) {
273 std::vector<ui> res;
274 std::set_intersection(records[id1].begin(), records[id1].end(),
275 records[id2].begin(), records[id2].end(),
276 std::back_inserter(res));
277 double ovlp = 0.0;
278 for(const auto &e : res)
279 ovlp += wordwt[e];
280 // return ovlp / std::min(weights[id1], weights[id2]);
281 return ovlp;
282 }
283
284 double overlapCoeff(int id1, int id2) {
285 std::vector<ui> res;
286 std::set_intersection(records[id1].begin(), records[id1].end(),
287 records[id2].begin(), records[id2].end(),
288 back_inserter(res));
289
290 double ovlp = res.size() * 1.0;
291 return ovlp / std::min(records[id1].size(), records[id2].size()) * 1.0;
292 // return ovlp;
293 }
294
295 void set_external_store(const std::string &_resPair_path){
296 if_external_IO = true;
297 resultPair_storePath = _resPair_path;
298 }
299
300
301public:
302 bool comp_comb1(const int a, const int b, int tid) {
303 // cout << c << " ";
304 auto & c1 = combs[tid][a];
305 auto & c2 = combs[tid][b];
306 for (int i = 0; i < c; i++) {
307 if (datasets[c1.id][c1.curr[i]] > datasets[c2.id][c2.curr[i]])
308 return false;
309 else if (datasets[c1.id][c1.curr[i]] < datasets[c2.id][c2.curr[i]])
310 return true;
311 }
312 return c1.id > c2.id;
313 }
314
315public:
316 // build heap for combination_p1
317 bool build_heap(const std::vector<std::pair<int,int>> &vec, const std::vector<std::vector<ui>> &dataset,
318 int L, std::vector<int> &heap, std::vector<combination_p1> &combs, int &heap_size,
319 int tid);
320};
321
322
323
325{
326public:
327 OvlpUtilParallel() = default;
328 ~OvlpUtilParallel() = default;
329 OvlpUtilParallel(const OvlpUtilParallel& other) = delete;
331
332public:
333 static bool comp_int(const int a, const int b) {
334 return a > b;
335 }
336 static bool comp_pair(const std::pair<int, int> &p1, const int val) {
337 return p1.first < val;
338 }
339 static bool is_equal(const combination_p1 & c1, const combination_p1 & c2,
340 const OvlpRSJoinParallel &joiner) {
341 for (int i = 0; i < joiner.c; i++) {
342 if (joiner.datasets1[c1.id][c1.curr[i]] != joiner.datasets1[c2.id][c2.curr[i]])
343 return false;
344 }
345 return true;
346 }
347 static bool is_equal(const combination_p1 & c1, const combination_p1 & c2,
348 const OvlpSelfJoinParallel &joiner) {
349 for (int i = 0; i < joiner.c; i++) {
350 if (joiner.datasets[c1.id][c1.curr[i]] != joiner.datasets[c2.id][c2.curr[i]])
351 return false;
352 }
353 return true;
354 }
355 static bool is_equal(const combination_p2 & c1, const combination_p2 & c2,
356 const OvlpRSJoinParallel &joiner) {
357 for (int i = 0; i < joiner.c; i++) {
358 if (joiner.datasets2[c1.id][c1.curr[i]] != joiner.datasets2[c2.id][c2.curr[i]])
359 return false;
360 }
361 return true;
362 }
363 static int compare(const combination_p1 & c1, const combination_p2 & c2,
364 const OvlpRSJoinParallel &joiner) {
365 // cout << joiner.c << ' ';
366 for (int i = 0; i < joiner.c; i++) {
367 if (joiner.datasets1[c1.id][c1.curr[i]] > joiner.datasets2[c2.id][c2.curr[i]])
368 return 1;
369 else if (joiner.datasets1[c1.id][c1.curr[i]] < joiner.datasets2[c2.id][c2.curr[i]])
370 return -1;
371 }
372 return 0;
373 }
374 static int64_t nchoosek(int64_t n, int64_t k) {
375 if (k == 0) return 1;
376 return (n * nchoosek(n - 1, k - 1)) / k;
377 }
378
379public:
380 static void removeShort(const std::vector<std::vector<ui>> &records, std::unordered_map<ui, std::vector<int>> &ele,
381 const OvlpRSJoinParallel &joiner);
382 // Remove "widows" from a hash map based on another hash map.
383 // This function removes key-value pairs from the unordered_map 'ele'
384 // if the key doesn't exist in another unordered_map 'ele_other'.
385 static void removeWidow(std::unordered_map<ui, std::vector<int>> &ele, const std::unordered_map<ui, std::vector<int>> &ele_other);
386 static void transform(std::unordered_map<ui, std::vector<int>> &ele, const std::vector<std::pair<int, int>> &eles,
387 std::vector<std::pair<int, int>> &idmap, std::vector<std::vector<std::pair<int, int>>> &ele_lists,
388 std::vector<std::vector<ui>> &dataset, const ui total_eles, const int n, const OvlpRSJoinParallel &joiner);
389};
390
391
392
393#endif // _OVLP_JOIN_PARALLEL_H_
Definition ovlpjoin_parallel.h:92
ui maxHeapSize
Definition ovlpjoin_parallel.h:108
std::vector< int > heap2[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:105
int n2
Definition ovlpjoin_parallel.h:94
int64_t result_num
Definition ovlpjoin_parallel.h:117
int n1
Definition ovlpjoin_parallel.h:94
std::vector< std::vector< std::pair< int, int > > > ele_lists2
Definition ovlpjoin_parallel.h:103
std::vector< std::vector< ui > > records1
Definition ovlpjoin_parallel.h:98
bool comp_comb2(const int a, const int b, int tid)
Definition ovlpjoin_parallel.h:184
std::vector< WeightPair > result_pairs_[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:112
void set_external_store(const std::string &_resPair_path)
Definition ovlpjoin_parallel.h:166
bool isWeightedComp
Definition ovlpjoin_parallel.h:110
std::vector< std::pair< int, int > > idmap_records2
Definition ovlpjoin_parallel.h:102
int64_t candidate_num
Definition ovlpjoin_parallel.h:116
std::vector< std::pair< int, int > > idmap_records1
Definition ovlpjoin_parallel.h:102
std::vector< double > recWeights2
Definition ovlpjoin_parallel.h:100
OvlpRSJoinParallel(const std::vector< std::vector< ui > > &sorted_records_1, const std::vector< std::vector< ui > > &sorted_records_2, const std::vector< double > &rec1wt, const std::vector< double > &rec2wt, const std::vector< double > &_wordwt, ui _maxHeapSize=0, bool _isWeightedComp=false)
Definition ovlpjoin_parallel.h:125
std::vector< std::vector< ui > > datasets1
Definition ovlpjoin_parallel.h:99
std::vector< int > heap1[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:105
int c
Definition ovlpjoin_parallel.h:95
ui total_eles
Definition ovlpjoin_parallel.h:96
int isHeap[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:113
std::vector< std::pair< int, int > > result_pairs[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:104
bool build_heap(const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination_p1 > &combs, int &heap_size, int tid)
Definition ovlpjoin_parallel.cc:15
std::string resultPair_storePath
Definition ovlpjoin_parallel.h:123
void overlapjoin(int overlap_threshold, std::vector< std::pair< int, int > > &finalPairs)
Definition ovlpjoin_parallel.cc:450
std::vector< combination_p1 > combs1[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:106
std::vector< std::vector< ui > > records2
Definition ovlpjoin_parallel.h:98
double overlapCoeff(int id1, int id2)
Definition ovlpjoin_parallel.h:154
std::vector< std::vector< ui > > datasets2
Definition ovlpjoin_parallel.h:99
std::vector< double > wordwt
Definition ovlpjoin_parallel.h:101
double weightedOverlapCoeff(int id1, int id2)
Definition ovlpjoin_parallel.h:142
bool if_external_IO
Definition ovlpjoin_parallel.h:122
std::vector< double > recWeights1
Definition ovlpjoin_parallel.h:100
std::vector< std::vector< std::pair< int, int > > > ele_lists1
Definition ovlpjoin_parallel.h:103
void small_case(int L1, int R1, int L2, int R2, std::vector< std::pair< int, int > > &finalPairs)
Definition ovlpjoin_parallel.cc:154
bool comp_comb1(const int a, const int b, int tid)
Definition ovlpjoin_parallel.h:172
std::vector< combination_p2 > combs2[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:107
Definition ovlpjoin_parallel.h:209
double binary_cost
Definition ovlpjoin_parallel.h:238
int64_t candidate_num
Definition ovlpjoin_parallel.h:234
ui total_eles
Definition ovlpjoin_parallel.h:213
std::vector< std::vector< ui > > records
Definition ovlpjoin_parallel.h:216
bool comp_comb1(const int a, const int b, int tid)
Definition ovlpjoin_parallel.h:302
void set_external_store(const std::string &_resPair_path)
Definition ovlpjoin_parallel.h:295
int isHeap[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:232
bool build_heap(const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination_p1 > &combs, int &heap_size, int tid)
Definition ovlpjoin_parallel.cc:553
int64_t list_cost
Definition ovlpjoin_parallel.h:236
int earlyTerminated[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:214
void large_case(int L, int R, std::vector< std::pair< int, int > > &finalPairs)
Definition ovlpjoin_parallel.cc:1020
int alive_id
Definition ovlpjoin_parallel.h:242
void small_case(int L, int R, std::vector< std::pair< int, int > > &finalPairs)
Definition ovlpjoin_parallel.cc:583
int64_t large_cost
Definition ovlpjoin_parallel.h:240
int estimate()
Definition ovlpjoin_parallel.cc:974
int divide(int nL)
Definition ovlpjoin_parallel.cc:962
std::vector< int > heap[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:223
double overlapCoeff(int id1, int id2)
Definition ovlpjoin_parallel.h:284
OvlpSelfJoinParallel(const std::vector< std::vector< ui > > &sorted_records, const std::vector< double > &recwt, const std::vector< double > &_wordwt, ui _maxHeapSize=0, bool _isWeightedComp=false)
Definition ovlpjoin_parallel.h:256
std::vector< WeightPair > result_pairs_[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:231
bool if_external_IO
Definition ovlpjoin_parallel.h:253
ui maxHeapSize
Definition ovlpjoin_parallel.h:227
std::vector< std::vector< std::pair< int, int > > > ele_lists
Definition ovlpjoin_parallel.h:221
std::vector< std::pair< int, int > > result_pairs[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:222
void overlapjoin(int overlap_threshold, std::vector< std::pair< int, int > > &finalPairs)
Definition ovlpjoin_parallel.cc:1090
std::string resultPair_storePath
Definition ovlpjoin_parallel.h:254
std::vector< double > weights
Definition ovlpjoin_parallel.h:218
std::vector< double > wordwt
Definition ovlpjoin_parallel.h:219
double weightedOverlapCoeff(int id1, int id2)
Definition ovlpjoin_parallel.h:272
int64_t small_estimate(int L, int R)
Definition ovlpjoin_parallel.cc:823
int c
Definition ovlpjoin_parallel.h:212
std::vector< std::pair< int, int > > buck
Definition ovlpjoin_parallel.h:226
int n1
Definition ovlpjoin_parallel.h:211
std::vector< std::vector< ui > > datasets
Definition ovlpjoin_parallel.h:217
int64_t large_estimate(int L, int R)
Definition ovlpjoin_parallel.cc:939
uint64_t heap_op
Definition ovlpjoin_parallel.h:239
std::vector< std::pair< int, int > > idmap_records
Definition ovlpjoin_parallel.h:220
std::unordered_set< int > random_ids
Definition ovlpjoin_parallel.h:225
std::vector< combination_p1 > combs[MAXTHREADNUM]
Definition ovlpjoin_parallel.h:224
int64_t large_est_cost
Definition ovlpjoin_parallel.h:241
bool isWeightedComp
Definition ovlpjoin_parallel.h:229
double heap_cost
Definition ovlpjoin_parallel.h:237
int64_t result_num
Definition ovlpjoin_parallel.h:235
Definition ovlpjoin_parallel.h:325
OvlpUtilParallel(OvlpUtilParallel &&other)=delete
static void removeShort(const std::vector< std::vector< ui > > &records, std::unordered_map< ui, std::vector< int > > &ele, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:75
static bool is_equal(const combination_p1 &c1, const combination_p1 &c2, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.h:339
static bool comp_int(const int a, const int b)
Definition ovlpjoin_parallel.h:333
static void removeWidow(std::unordered_map< ui, std::vector< int > > &ele, const std::unordered_map< ui, std::vector< int > > &ele_other)
Definition ovlpjoin_parallel.cc:90
~OvlpUtilParallel()=default
static bool is_equal(const combination_p2 &c1, const combination_p2 &c2, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.h:355
static bool is_equal(const combination_p1 &c1, const combination_p1 &c2, const OvlpSelfJoinParallel &joiner)
Definition ovlpjoin_parallel.h:347
OvlpUtilParallel(const OvlpUtilParallel &other)=delete
static int compare(const combination_p1 &c1, const combination_p2 &c2, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.h:363
static void transform(std::unordered_map< ui, std::vector< int > > &ele, const std::vector< std::pair< int, int > > &eles, std::vector< std::pair< int, int > > &idmap, std::vector< std::vector< std::pair< int, int > > > &ele_lists, std::vector< std::vector< ui > > &dataset, const ui total_eles, const int n, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:110
OvlpUtilParallel()=default
static int64_t nchoosek(int64_t n, int64_t k)
Definition ovlpjoin_parallel.h:374
static bool comp_pair(const std::pair< int, int > &p1, const int val)
Definition ovlpjoin_parallel.h:336
#define MAX_PAIR_SIZE
Definition config.h:44
#define MAXTHREADNUM
Definition config.h:38
Definition ovlpjoin_parallel.h:38
std::vector< int > curr
Definition ovlpjoin_parallel.h:43
bool ifsame(const std::vector< ui > &data, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1345
int N
Definition ovlpjoin_parallel.h:40
bool stepback(const int i, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1252
combination_p1(int d, int beg, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1196
bool completed
Definition ovlpjoin_parallel.h:42
void binary(const combination_p1 &value, const OvlpSelfJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1284
void print(const OvlpRSJoinParallel &joiner) const
Definition ovlpjoin_parallel.cc:1240
int getlastcurr(const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1214
void next(const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1226
int id
Definition ovlpjoin_parallel.h:41
Definition ovlpjoin_parallel.h:64
int N
Definition ovlpjoin_parallel.h:66
int id
Definition ovlpjoin_parallel.h:67
bool ifsame(const std::vector< ui > &data, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1474
combination_p2(int d, int beg, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1353
void next(const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1368
bool completed
Definition ovlpjoin_parallel.h:68
int getlastcurr(const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1362
bool stepback(const int i, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1394
void print(const OvlpRSJoinParallel &joiner) const
Definition ovlpjoin_parallel.cc:1382
std::vector< int > curr
Definition ovlpjoin_parallel.h:69
void binary(const combination_p2 &value, const OvlpRSJoinParallel &joiner)
Definition ovlpjoin_parallel.cc:1410
unsigned int ui
Definition type.h:8