set linesize 100
column col1 format a12
column col2 format a12
select col1,
col2,
fuzzy_match(levenshtein, col1, col2) as levenshtein,
fuzzy_match(jaro_winkler, col1, col2) as jaro_winkler,
fuzzy_match(bigram, col1, col2) as bigram,
fuzzy_match(trigram, col1, col2) as trigram,
fuzzy_match(whole_word_match, col1, col2) as wwm,
fuzzy_match(longest_common_substring, col1, col2) as lcs
from match_tab;
COL1 COL2 LEVENSHTEIN JARO_WINKLER BIGRAM TRIGRAM WWM LCS
------------ ------------ ----------- ------------ ---------- ---------- ---------- ----------
Peter Parker Pete Parker 92 92 90 70 50 58
Peter Parker peter parker 84 88 72 60 0 41
Clark Kent Claire Kent 82 90 60 44 50 45
Wonder Woman Ponder Woman 92 94 100 90 50 91
Superman Superman 100 100 100 100 100 100
The Hulk Iron Man 0 41 0 0 0 12
6 rows selected.
SQL>
select col1,
col2,
fuzzy_match(levenshtein, col1, col2, unscaled) as levenshtein,
fuzzy_match(jaro_winkler, col1, col2, unscaled) as jaro_winkler,
fuzzy_match(bigram, col1, col2, unscaled) as bigram,
fuzzy_match(trigram, col1, col2, unscaled) as trigram,
fuzzy_match(whole_word_match, col1, col2, unscaled) as wwm,
fuzzy_match(longest_common_substring, col1, col2, unscaled) as lcs
from match_tab;
COL1 COL2 LEVENSHTEIN JARO_WINKLER BIGRAM TRIGRAM WWM LCS
------------ ------------ ----------- ------------ ---------- ---------- ---------- ----------
Peter Parker Pete Parker 1 .92 10 7 1 7
Peter Parker peter parker 2 .88 8 6 0 5
Clark Kent Claire Kent 2 .9 6 4 1 5
Wonder Woman Ponder Woman 1 .94 11 9 1 11
Superman Superman 0 1 7 6 1 8
The Hulk Iron Man 8 .41 0 0 0 1
6 rows selected.
SQL>
select col1,
col2,
fuzzy_match(levenshtein, col1, col2, relate_to_shorter) as levenshtein,
fuzzy_match(jaro_winkler, col1, col2, relate_to_shorter) as jaro_winkler,
fuzzy_match(bigram, col1, col2, relate_to_shorter) as bigram,
fuzzy_match(trigram, col1, col2, relate_to_shorter) as trigram,
fuzzy_match(whole_word_match, col1, col2, relate_to_shorter) as wwm,
fuzzy_match(longest_common_substring, col1, col2, relate_to_shorter) as lcs
from match_tab;
COL1 COL2 LEVENSHTEIN JARO_WINKLER BIGRAM TRIGRAM WWM LCS
------------ ------------ ----------- ------------ ---------- ---------- ---------- ----------
Peter Parker Pete Parker 91 92 100 77 50 63
Peter Parker peter parker 84 88 72 60 0 41
Clark Kent Claire Kent 80 90 66 50 50 50
Wonder Woman Ponder Woman 92 94 100 90 50 91
Superman Superman 100 100 100 100 100 100
The Hulk Iron Man 0 41 0 0 0 12
6 rows selected.
SQL>
select col1,
col2,
fuzzy_match(whole_word_match, col1, col2) as wwm,
fuzzy_match(whole_word_match, col1, col2, edit_tolerance 20) as wwm20,
fuzzy_match(whole_word_match, col1, col2, edit_tolerance 82) as wwm82
from match_tab;
COL1 COL2 WWM WWM20 WWM82
------------ ------------ ---------- ---------- ----------
Peter Parker Pete Parker 50 100 50
Peter Parker peter parker 0 100 50
Clark Kent Claire Kent 50 100 50
Wonder Woman Ponder Woman 50 100 100
Superman Superman 100 100 100
The Hulk Iron Man 0 0 0
6 rows selected.
SQL>