Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
feature.h
Go to the documentation of this file.
1/*
2 * author: Yunqi Li
3 * contact: liyunqixa@gmail.com
4 */
5#ifndef _FEATURE_H_
6#define _FEATURE_H_
7
8#include "common/io.h"
11#include "feature/cal_feature.h"
12#include <fstream>
13#include <sys/time.h>
14#include <omp.h>
15
16// arguments for feature api
18{
20 char *attributes[20];
21
22 FeatureArguments() = default;
24 for(int i = 0; i < 20; i++)
25 delete[] attributes[i];
26 }
27};
28
29
31{
32public:
33 FeatureEngineering() = default;
35 FeatureEngineering(const FeatureEngineering &other) = delete;
37
38public:
39 static void readGroups(int totalAttr, const std::vector<std::string> &attrVec, FeatureIndex::Groups &group,
40 FeatureIndex::GroupTokens &groupTokensDlm, FeatureIndex::GroupTokens &groupTokensQgm,
41 FeatureIndex::Cluster &cluster, std::vector<int> &keyLength,
42 const std::string &defaultICVDir = "");
43
44 static void readFeatures(ui &numFeatures, Rule *&featureNames, std::vector<std::string> &nameCopy,
45 const std::string &defaultFeatureNamesDir = "");
46 /*
47 * extract features with interchangeable values
48 * this is used for matching with updated features
49 */
50 static void extractFeatures4Matching(int isInterchangeable, bool flagConsistent, int totalTable,
51 const FeatureArguments *attrs, const std::string &defaultFeatureVecDir = "",
52 const std::string &defaultResTableName = "", const std::string &defaultICVDir = "",
53 const std::string &defeaultFeatureNamesDir = "");
54 /*
55 * extract features for top k
56 * will use interchangeable values depending on input
57 */
58 static void extractFeatures4TopK(int isInterchangeable, bool flagConsistent, int totalTable,
59 const FeatureArguments *attrs, const std::string &defaultFeatureVecDir = "",
60 const std::string &defaultICVDir = "", const std::string &defaultFeatureNamesDir = "");
61};
62
63
64#endif // _FEATURE_H_
Definition feature.h:31
~FeatureEngineering()=default
static void extractFeatures4Matching(int isInterchangeable, bool flagConsistent, int totalTable, const FeatureArguments *attrs, const std::string &defaultFeatureVecDir="", const std::string &defaultResTableName="", const std::string &defaultICVDir="", const std::string &defeaultFeatureNamesDir="")
Definition feature.cc:92
FeatureEngineering(FeatureEngineering &&other)=delete
FeatureEngineering(const FeatureEngineering &other)=delete
static void readFeatures(ui &numFeatures, Rule *&featureNames, std::vector< std::string > &nameCopy, const std::string &defaultFeatureNamesDir="")
Definition feature.cc:66
static void extractFeatures4TopK(int isInterchangeable, bool flagConsistent, int totalTable, const FeatureArguments *attrs, const std::string &defaultFeatureVecDir="", const std::string &defaultICVDir="", const std::string &defaultFeatureNamesDir="")
Definition feature.cc:217
static void readGroups(int totalAttr, const std::vector< std::string > &attrVec, FeatureIndex::Groups &group, FeatureIndex::GroupTokens &groupTokensDlm, FeatureIndex::GroupTokens &groupTokensQgm, FeatureIndex::Cluster &cluster, std::vector< int > &keyLength, const std::string &defaultICVDir="")
Definition feature.cc:8
FeatureEngineering()=default
std::vector< std::unordered_map< int, std::vector< std::string > > > Groups
Definition feature_index.h:26
std::vector< std::unordered_map< std::string, int > > Cluster
Definition feature_index.h:31
std::vector< std::unordered_map< int, std::vector< std::vector< std::string > > > > GroupTokens
Definition feature_index.h:28
Definition feature.h:18
int totalAttr
Definition feature.h:19
char * attributes[20]
Definition feature.h:20
FeatureArguments()=default
~FeatureArguments()
Definition feature.h:23
Definition dataframe.h:54
unsigned int ui
Definition type.h:8