Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
FeatureIndex Class Reference

#include <feature_index.h>

Public Types

using Group = std::unordered_map<int, std::vector<std::string>>
 
using Groups = std::vector<std::unordered_map<int, std::vector<std::string>>>
 
using GroupToken = std::unordered_map<int, std::vector<std::vector<std::string>>>
 
using GroupTokens = std::vector<std::unordered_map<int, std::vector<std::vector<std::string>>>>
 
using GroupTokenInt = std::unordered_map<int, std::vector<std::vector<ui>>>
 
using GroupTokensInt = std::vector<std::unordered_map<int, std::vector<std::vector<ui>>>>
 
using Cluster = std::vector<std::unordered_map<std::string, int>>
 

Public Member Functions

 FeatureIndex ()=default
 
 ~FeatureIndex ()
 
 FeatureIndex (const FeatureIndex &other)=delete
 
 FeatureIndex (FeatureIndex &&other)=delete
 
int calCahceIndex (const std::string &func, const std::string &tok, int numFeature)
 
int calNumFeature (const std::string attr)
 
void globalInit (const std::vector< int > &keyNum, const std::vector< std::string > &attrs, Groups &groups, const GroupTokens &grpdlm, const GroupTokens &grpqgm, bool isCoeff=false)
 

Public Attributes

std::vector< std::string > str_gt_10w = {"name", "title", "description"}
 
std::vector< std::string > str_bt_1w_5w = {}
 
std::vector< std::string > str_bt_5w_10w = {}
 
std::vector< std::string > str_eq_1w = {"brand", "category", "manufacturer"}
 
double **** featureValCache = nullptr
 
int ** discreteCacheIdx = nullptr
 
std::vector< int > attrCahceLength
 
const int MIN_CACHED_LENGTH = 10000
 
const double LENGTH_FILTER_DET = 0.1
 
const double universalDet = LENGTH_FILTER_DET
 
const double cosLengthFilter = universalDet * universalDet
 
const double diceLengthFilter = universalDet / (2 - universalDet)
 

Member Typedef Documentation

◆ Cluster

using FeatureIndex::Cluster = std::vector<std::unordered_map<std::string, int>>

◆ Group

using FeatureIndex::Group = std::unordered_map<int, std::vector<std::string>>

◆ Groups

using FeatureIndex::Groups = std::vector<std::unordered_map<int, std::vector<std::string>>>

◆ GroupToken

using FeatureIndex::GroupToken = std::unordered_map<int, std::vector<std::vector<std::string>>>

◆ GroupTokenInt

using FeatureIndex::GroupTokenInt = std::unordered_map<int, std::vector<std::vector<ui>>>

◆ GroupTokens

using FeatureIndex::GroupTokens = std::vector<std::unordered_map<int, std::vector<std::vector<std::string>>>>

◆ GroupTokensInt

using FeatureIndex::GroupTokensInt = std::vector<std::unordered_map<int, std::vector<std::vector<ui>>>>

Constructor & Destructor Documentation

◆ FeatureIndex() [1/3]

FeatureIndex::FeatureIndex ( )
default

◆ ~FeatureIndex()

FeatureIndex::~FeatureIndex ( )
inline

◆ FeatureIndex() [2/3]

FeatureIndex::FeatureIndex ( const FeatureIndex & other)
delete

◆ FeatureIndex() [3/3]

FeatureIndex::FeatureIndex ( FeatureIndex && other)
delete

Member Function Documentation

◆ calCahceIndex()

int FeatureIndex::calCahceIndex ( const std::string & func,
const std::string & tok,
int numFeature )

◆ calNumFeature()

int FeatureIndex::calNumFeature ( const std::string attr)

◆ globalInit()

void FeatureIndex::globalInit ( const std::vector< int > & keyNum,
const std::vector< std::string > & attrs,
Groups & groups,
const GroupTokens & grpdlm,
const GroupTokens & grpqgm,
bool isCoeff = false )

Member Data Documentation

◆ attrCahceLength

std::vector<int> FeatureIndex::attrCahceLength

◆ cosLengthFilter

const double FeatureIndex::cosLengthFilter = universalDet * universalDet

◆ diceLengthFilter

const double FeatureIndex::diceLengthFilter = universalDet / (2 - universalDet)

◆ discreteCacheIdx

int** FeatureIndex::discreteCacheIdx = nullptr

◆ featureValCache

double**** FeatureIndex::featureValCache = nullptr

◆ LENGTH_FILTER_DET

const double FeatureIndex::LENGTH_FILTER_DET = 0.1

◆ MIN_CACHED_LENGTH

const int FeatureIndex::MIN_CACHED_LENGTH = 10000

◆ str_bt_1w_5w

std::vector<std::string> FeatureIndex::str_bt_1w_5w = {}

◆ str_bt_5w_10w

std::vector<std::string> FeatureIndex::str_bt_5w_10w = {}

◆ str_eq_1w

std::vector<std::string> FeatureIndex::str_eq_1w = {"brand", "category", "manufacturer"}

◆ str_gt_10w

std::vector<std::string> FeatureIndex::str_gt_10w = {"name", "title", "description"}

◆ universalDet

const double FeatureIndex::universalDet = LENGTH_FILTER_DET

The documentation for this class was generated from the following files: