Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
Tokenizer Class Reference

#include <tokenizer.h>

Public Member Functions

 Tokenizer ()=default
 
 ~Tokenizer ()=default
 
 Tokenizer (const Tokenizer &other)=delete
 
 Tokenizer (Tokenizer &&other)=delete
 

Static Public Member Functions

static void string2TokensDlm (const std::string &s, std::vector< std::string > &res, const std::string &delims)
 
static void string2TokensQGram (const std::string &s, std::vector< std::string > &res, ui q)
 
static void string2TokensWSpace (const std::string &s, std::vector< std::string > &res)
 
static void string2TokensAlphaNumeric (const std::string &s, std::vector< std::string > &res)
 
static void stringNormalize (std::string &s, ui startegy)
 
static void updateBagDlm (const Table &table, std::vector< std::vector< std::string > > &bow, ui column, const std::string &dlim, ui strategy)
 
static void updateBagQGram (const Table &table, std::vector< std::vector< std::string > > &bow, ui column, ui q)
 
static void updateBagAlphaNumeric (const Table &table, std::vector< std::vector< std::string > > &bow, ui column)
 
static void sortIdMap (std::vector< ui > &id_map, const std::vector< std::vector< ui > > &datasets)
 
static void RStableAttr2IntVector (const Table &tableA, const Table &tableB, std::vector< std::vector< ui > > &recordsA, std::vector< std::vector< ui > > &recordsB, std::vector< double > &weightsA, std::vector< double > &weightsB, std::vector< double > &wordwt, std::vector< ui > &id_mapA, std::vector< ui > &id_mapB, ui columnA, ui columnB, TokenizerType tok_type, ui &num_word, ui q)
 
static void SelftableAttr2IntVector (const Table &tableA, std::vector< std::vector< ui > > &recordsA, std::vector< double > &weightsA, std::vector< double > &wordwt, std::vector< ui > &id_mapA, ui columnA, TokenizerType tok_type, ui &num_word, ui q)
 
static void resTableAttr2IntVector (const Table &resTable, std::vector< std::vector< ui > > &recordsA, std::vector< std::vector< ui > > &recordsB, std::vector< double > &weightsA, std::vector< double > &weightsB, std::vector< double > &wordwt, ui columnA, ui columnB, TokenizerType tok_type, ui &num_word, ui q)
 

Constructor & Destructor Documentation

◆ Tokenizer() [1/3]

Tokenizer::Tokenizer ( )
default

◆ ~Tokenizer()

Tokenizer::~Tokenizer ( )
default

◆ Tokenizer() [2/3]

Tokenizer::Tokenizer ( const Tokenizer & other)
delete

◆ Tokenizer() [3/3]

Tokenizer::Tokenizer ( Tokenizer && other)
delete

Member Function Documentation

◆ resTableAttr2IntVector()

void Tokenizer::resTableAttr2IntVector ( const Table & resTable,
std::vector< std::vector< ui > > & recordsA,
std::vector< std::vector< ui > > & recordsB,
std::vector< double > & weightsA,
std::vector< double > & weightsB,
std::vector< double > & wordwt,
ui columnA,
ui columnB,
TokenizerType tok_type,
ui & num_word,
ui q )
static

◆ RStableAttr2IntVector()

void Tokenizer::RStableAttr2IntVector ( const Table & tableA,
const Table & tableB,
std::vector< std::vector< ui > > & recordsA,
std::vector< std::vector< ui > > & recordsB,
std::vector< double > & weightsA,
std::vector< double > & weightsB,
std::vector< double > & wordwt,
std::vector< ui > & id_mapA,
std::vector< ui > & id_mapB,
ui columnA,
ui columnB,
TokenizerType tok_type,
ui & num_word,
ui q )
static

◆ SelftableAttr2IntVector()

void Tokenizer::SelftableAttr2IntVector ( const Table & tableA,
std::vector< std::vector< ui > > & recordsA,
std::vector< double > & weightsA,
std::vector< double > & wordwt,
std::vector< ui > & id_mapA,
ui columnA,
TokenizerType tok_type,
ui & num_word,
ui q )
static

◆ sortIdMap()

void Tokenizer::sortIdMap ( std::vector< ui > & id_map,
const std::vector< std::vector< ui > > & datasets )
static

◆ string2TokensAlphaNumeric()

void Tokenizer::string2TokensAlphaNumeric ( const std::string & s,
std::vector< std::string > & res )
static

◆ string2TokensDlm()

void Tokenizer::string2TokensDlm ( const std::string & s,
std::vector< std::string > & res,
const std::string & delims )
static

◆ string2TokensQGram()

void Tokenizer::string2TokensQGram ( const std::string & s,
std::vector< std::string > & res,
ui q )
static

◆ string2TokensWSpace()

void Tokenizer::string2TokensWSpace ( const std::string & s,
std::vector< std::string > & res )
static

◆ stringNormalize()

void Tokenizer::stringNormalize ( std::string & s,
ui startegy )
static

◆ updateBagAlphaNumeric()

void Tokenizer::updateBagAlphaNumeric ( const Table & table,
std::vector< std::vector< std::string > > & bow,
ui column )
static

◆ updateBagDlm()

void Tokenizer::updateBagDlm ( const Table & table,
std::vector< std::vector< std::string > > & bow,
ui column,
const std::string & dlim,
ui strategy )
static

◆ updateBagQGram()

void Tokenizer::updateBagQGram ( const Table & table,
std::vector< std::vector< std::string > > & bow,
ui column,
ui q )
static

The documentation for this class was generated from the following files: