Variables | |
file = Path(__file__).resolve() | |
parent | |
root | |
str | dir_path = "../datasets/tables/megallen/amazon-google-structured" |
str | path_tableA = "/".join([dir_path, "table_a.csv"]) |
str | path_tableB = "/".join([dir_path, "table_b.csv"]) |
str | path_gold = "/".join([dir_path, "gold.csv"]) |
str | path_rule = "simjoin_entitymatching/blocker/rules/rules_amazon_google_structured1.txt" |
gold_graph = nx.Graph() | |
tableA = read_csv_table(path_tableA) | |
tableB = read_csv_table(path_tableB) | |
gold = read_csv_golds(path_gold, gold_graph) | |
sample_res = Path("output/buffer/sample_res.csv") | |
sample_abs_path = sample_res.resolve(strict=True) | |
sample_strategy | |
blocking_attr | |
cluster_tau | |
sample_tau | |
step2_tau | |
num_data | |
sample_tab = pd.read_csv(sample_abs_path) | |
str | id1 = str(sample_tab.loc[index, 'ltable_id']) + 'A' |
str | id2 = str(sample_tab.loc[index, 'rtable_id']) + 'B' |
index | |
blocking_attr_type | |
blocking_top_k | |
table_size | |
is_join_topk | |
is_idf_weighted | |
test_block.blocking_attr |
test_block.blocking_attr_type |
test_block.blocking_top_k |
test_block.cluster_tau |
str test_block.dir_path = "../datasets/tables/megallen/amazon-google-structured" |
test_block.file = Path(__file__).resolve() |
test_block.gold = read_csv_golds(path_gold, gold_graph) |
test_block.gold_graph = nx.Graph() |
str test_block.id1 = str(sample_tab.loc[index, 'ltable_id']) + 'A' |
str test_block.id2 = str(sample_tab.loc[index, 'rtable_id']) + 'B' |
test_block.index |
test_block.is_idf_weighted |
test_block.is_join_topk |
test_block.num_data |
test_block.parent |
test_block.path_gold = "/".join([dir_path, "gold.csv"]) |
test_block.path_rule = "simjoin_entitymatching/blocker/rules/rules_amazon_google_structured1.txt" |
test_block.path_tableA = "/".join([dir_path, "table_a.csv"]) |
test_block.path_tableB = "/".join([dir_path, "table_b.csv"]) |
test_block.root |
test_block.sample_abs_path = sample_res.resolve(strict=True) |
test_block.sample_res = Path("output/buffer/sample_res.csv") |
test_block.sample_strategy |
test_block.sample_tab = pd.read_csv(sample_abs_path) |
test_block.sample_tau |
test_block.step2_tau |
test_block.table_size |
test_block.tableA = read_csv_table(path_tableA) |
test_block.tableB = read_csv_table(path_tableB) |