Entity Matching by Similarity Join
Loading...
Searching...
No Matches
Namespaces
|
Variables
test_block.py File Reference
Namespaces
namespace
test_block
Variables
test_block.file
= Path(__file__).resolve()
test_block.parent
test_block.root
str
test_block.dir_path
= "../datasets/tables/megallen/amazon-google-structured"
str
test_block.path_tableA
= "/".join([
dir_path
, "table_a.csv"])
str
test_block.path_tableB
= "/".join([
dir_path
, "table_b.csv"])
str
test_block.path_gold
= "/".join([
dir_path
, "gold.csv"])
str
test_block.path_rule
= "simjoin_entitymatching/blocker/
rules
/rules_amazon_google_structured1.txt"
test_block.gold_graph
= nx.Graph()
test_block.tableA
= read_csv_table(
path_tableA
)
test_block.tableB
= read_csv_table(
path_tableB
)
test_block.gold
= read_csv_golds(
path_gold
,
gold_graph
)
test_block.sample_res
= Path("output/buffer/sample_res.csv")
test_block.sample_abs_path
= sample_res.resolve(strict=True)
test_block.sample_strategy
test_block.blocking_attr
test_block.cluster_tau
test_block.sample_tau
test_block.step2_tau
test_block.num_data
test_block.sample_tab
= pd.read_csv(
sample_abs_path
)
str
test_block.id1
= str(sample_tab.loc[
index
, 'ltable_id']) + 'A'
str
test_block.id2
= str(sample_tab.loc[
index
, 'rtable_id']) + 'B'
test_block.index
test_block.blocking_attr_type
test_block.blocking_top_k
test_block.table_size
test_block.is_join_topk
test_block.is_idf_weighted
test
test_block.py
Generated by
1.12.0