Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
test_block Namespace Reference

Variables

 file = Path(__file__).resolve()
 
 parent
 
 root
 
str dir_path = "../datasets/tables/megallen/amazon-google-structured"
 
str path_tableA = "/".join([dir_path, "table_a.csv"])
 
str path_tableB = "/".join([dir_path, "table_b.csv"])
 
str path_gold = "/".join([dir_path, "gold.csv"])
 
str path_rule = "simjoin_entitymatching/blocker/rules/rules_amazon_google_structured1.txt"
 
 gold_graph = nx.Graph()
 
 tableA = read_csv_table(path_tableA)
 
 tableB = read_csv_table(path_tableB)
 
 gold = read_csv_golds(path_gold, gold_graph)
 
 sample_res = Path("output/buffer/sample_res.csv")
 
 sample_abs_path = sample_res.resolve(strict=True)
 
 sample_strategy
 
 blocking_attr
 
 cluster_tau
 
 sample_tau
 
 step2_tau
 
 num_data
 
 sample_tab = pd.read_csv(sample_abs_path)
 
str id1 = str(sample_tab.loc[index, 'ltable_id']) + 'A'
 
str id2 = str(sample_tab.loc[index, 'rtable_id']) + 'B'
 
 index
 
 blocking_attr_type
 
 blocking_top_k
 
 table_size
 
 is_join_topk
 
 is_idf_weighted
 

Variable Documentation

◆ blocking_attr

test_block.blocking_attr

◆ blocking_attr_type

test_block.blocking_attr_type

◆ blocking_top_k

test_block.blocking_top_k

◆ cluster_tau

test_block.cluster_tau

◆ dir_path

str test_block.dir_path = "../datasets/tables/megallen/amazon-google-structured"

◆ file

test_block.file = Path(__file__).resolve()

◆ gold

test_block.gold = read_csv_golds(path_gold, gold_graph)

◆ gold_graph

test_block.gold_graph = nx.Graph()

◆ id1

str test_block.id1 = str(sample_tab.loc[index, 'ltable_id']) + 'A'

◆ id2

str test_block.id2 = str(sample_tab.loc[index, 'rtable_id']) + 'B'

◆ index

test_block.index

◆ is_idf_weighted

test_block.is_idf_weighted

◆ is_join_topk

test_block.is_join_topk

◆ num_data

test_block.num_data

◆ parent

test_block.parent

◆ path_gold

test_block.path_gold = "/".join([dir_path, "gold.csv"])

◆ path_rule

test_block.path_rule = "simjoin_entitymatching/blocker/rules/rules_amazon_google_structured1.txt"

◆ path_tableA

test_block.path_tableA = "/".join([dir_path, "table_a.csv"])

◆ path_tableB

test_block.path_tableB = "/".join([dir_path, "table_b.csv"])

◆ root

test_block.root

◆ sample_abs_path

test_block.sample_abs_path = sample_res.resolve(strict=True)

◆ sample_res

test_block.sample_res = Path("output/buffer/sample_res.csv")

◆ sample_strategy

test_block.sample_strategy

◆ sample_tab

test_block.sample_tab = pd.read_csv(sample_abs_path)

◆ sample_tau

test_block.sample_tau

◆ step2_tau

test_block.step2_tau

◆ table_size

test_block.table_size

◆ tableA

test_block.tableA = read_csv_table(path_tableA)

◆ tableB

test_block.tableB = read_csv_table(path_tableB)