Get quick answers to your questions about the article from our AI researcher chatbot
{'id': 'https://openalex.org/W3146259567', 'doi': 'https://doi.org/10.1109/tkde.2007.250581', 'title': 'Duplicate Record Detection: A Survey', 'display_name': 'Duplicate Record Detection: A Survey', 'publication_year': 2007, 'publication_date': '2007-01-01', 'ids': {'openalex': 'https://openalex.org/W3146259567', 'doi': 'https://doi.org/10.1109/tkde.2007.250581', 'mag': '3146259567'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://doi.org/10.1109/tkde.2007.250581', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S30698027', 'display_name': 'IEEE Transactions on Knowledge and Data Engineering', 'issn_l': '1041-4347', 'issn': ['1041-4347', '1558-2191', '2326-3865'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320439', 'host_organization_name': 'IEEE Computer Society', 'host_organization_lineage': ['https://openalex.org/P4310320439', 'https://openalex.org/P4310319808'], 'host_organization_lineage_names': ['IEEE Computer Society', 'Institute of Electrical and Electronics Engineers'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': ['crossref'], 'open_access': {'is_oa': True, 'oa_status': 'green', 'oa_url': 'http://archive.nyu.edu/bitstream/2451/14760/5/tkde2007.pdf', 'any_repository_has_fulltext': True}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5089912733', 'display_name': 'Ahmed K. Elmagarmid', 'orcid': 'https://orcid.org/0000-0002-0044-458X'}, 'institutions': [{'id': 'https://openalex.org/I219193219', 'display_name': 'Purdue University West Lafayette', 'ror': 'https://ror.org/02dqehb95', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I219193219']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Ahmed K. Elmagarmid', 'raw_affiliation_strings': ['Department of Computer Sciences and Cyber Center, Purdue University, West Lafayette, IN, USA'], 'affiliations': [{'raw_affiliation_string': 'Department of Computer Sciences and Cyber Center, Purdue University, West Lafayette, IN, USA', 'institution_ids': ['https://openalex.org/I219193219']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5010731709', 'display_name': 'Panagiotis G. Ipeirotis', 'orcid': 'https://orcid.org/0000-0002-2966-7402'}, 'institutions': [{'id': 'https://openalex.org/I57206974', 'display_name': 'New York University', 'ror': 'https://ror.org/0190ak572', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I57206974']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Panagiotis G. Ipeirotis', 'raw_affiliation_strings': ['Department of Information, Leonard N. Stern School of Business, Operation and Management Sciences, New York University, Newyork, NY, USA'], 'affiliations': [{'raw_affiliation_string': 'Department of Information, Leonard N. Stern School of Business, Operation and Management Sciences, New York University, Newyork, NY, USA', 'institution_ids': ['https://openalex.org/I57206974']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5085113815', 'display_name': 'Vassilios S. Verykios', 'orcid': 'https://orcid.org/0000-0002-9758-0819'}, 'institutions': [{'id': 'https://openalex.org/I145722265', 'display_name': 'University of Thessaly', 'ror': 'https://ror.org/04v4g9h31', 'country_code': 'GR', 'type': 'education', 'lineage': ['https://openalex.org/I145722265']}], 'countries': ['GR'], 'is_corresponding': False, 'raw_author_name': 'Vassilios S. Verykios', 'raw_affiliation_strings': ['Department of Computer and Communication Engineering, University of Thessally, Volos, Greece'], 'affiliations': [{'raw_affiliation_string': 'Department of Computer and Communication Engineering, University of Thessally, Volos, Greece', 'institution_ids': ['https://openalex.org/I145722265']}]}], 'institution_assertions': [], 'countries_distinct_count': 2, 'institutions_distinct_count': 3, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': None, 'apc_paid': None, 'fwci': 40.094, 'has_fulltext': False, 'cited_by_count': 1125, 'citation_normalized_percentile': {'value': 0.999878, 'is_in_top_1_percent': True, 'is_in_top_10_percent': True}, 'cited_by_percentile_year': {'min': 99, 'max': 100}, 'biblio': {'volume': '19', 'issue': '1', 'first_page': '1', 'last_page': '16'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T11719', 'display_name': 'Data Quality Assessment and Improvement', 'score': 1.0, 'subfield': {'id': 'https://openalex.org/subfields/1803', 'display_name': 'Management Science and Operations Research'}, 'field': {'id': 'https://openalex.org/fields/18', 'display_name': 'Decision Sciences'}, 'domain': {'id': 'https://openalex.org/domains/2', 'display_name': 'Social Sciences'}}, 'topics': [{'id': 'https://openalex.org/T11719', 'display_name': 'Data Quality Assessment and Improvement', 'score': 1.0, 'subfield': {'id': 'https://openalex.org/subfields/1803', 'display_name': 'Management Science and Operations Research'}, 'field': {'id': 'https://openalex.org/fields/18', 'display_name': 'Decision Sciences'}, 'domain': {'id': 'https://openalex.org/domains/2', 'display_name': 'Social Sciences'}}, {'id': 'https://openalex.org/T10764', 'display_name': 'Privacy-Preserving Techniques for Data Analysis and Machine Learning', 'score': 0.997, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T12016', 'display_name': 'Web Data Extraction and Crawling Techniques', 'score': 0.9968, 'subfield': {'id': 'https://openalex.org/subfields/1710', 'display_name': 'Information Systems'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/duplicate-detection', 'display_name': 'Duplicate Detection', 'score': 0.660776}, {'id': 'https://openalex.org/keywords/data-records-mining', 'display_name': 'Data Records Mining', 'score': 0.523301}], 'concepts': [{'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.8567642}, {'id': 'https://openalex.org/C124101348', 'wikidata': 'https://www.wikidata.org/wiki/Q172491', 'display_name': 'Data mining', 'level': 1, 'score': 0.6198318}, {'id': 'https://openalex.org/C48044578', 'wikidata': 'https://www.wikidata.org/wiki/Q727490', 'display_name': 'Scalability', 'level': 2, 'score': 0.5999039}, {'id': 'https://openalex.org/C23123220', 'wikidata': 'https://www.wikidata.org/wiki/Q816826', 'display_name': 'Information retrieval', 'level': 1, 'score': 0.53427035}, {'id': 'https://openalex.org/C165064840', 'wikidata': 'https://www.wikidata.org/wiki/Q1321061', 'display_name': 'Matching (statistics)', 'level': 2, 'score': 0.46811622}, {'id': 'https://openalex.org/C9652623', 'wikidata': 'https://www.wikidata.org/wiki/Q190109', 'display_name': 'Field (mathematics)', 'level': 2, 'score': 0.44918972}, {'id': 'https://openalex.org/C177264268', 'wikidata': 'https://www.wikidata.org/wiki/Q1514741', 'display_name': 'Set (abstract data type)', 'level': 2, 'score': 0.44595695}, {'id': 'https://openalex.org/C2780451532', 'wikidata': 'https://www.wikidata.org/wiki/Q759676', 'display_name': 'Task (project management)', 'level': 2, 'score': 0.42967287}, {'id': 'https://openalex.org/C77088390', 'wikidata': 'https://www.wikidata.org/wiki/Q8513', 'display_name': 'Database', 'level': 1, 'score': 0.30773395}, {'id': 'https://openalex.org/C105795698', 'wikidata': 'https://www.wikidata.org/wiki/Q12483', 'display_name': 'Statistics', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C33923547', 'wikidata': 'https://www.wikidata.org/wiki/Q395', 'display_name': 'Mathematics', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C187736073', 'wikidata': 'https://www.wikidata.org/wiki/Q2920921', 'display_name': 'Management', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C202444582', 'wikidata': 'https://www.wikidata.org/wiki/Q837863', 'display_name': 'Pure mathematics', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C162324750', 'wikidata': 'https://www.wikidata.org/wiki/Q8134', 'display_name': 'Economics', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C199360897', 'wikidata': 'https://www.wikidata.org/wiki/Q9143', 'display_name': 'Programming language', 'level': 1, 'score': 0.0}], 'mesh': [], 'locations_count': 4, 'locations': [{'is_oa': False, 'landing_page_url': 'https://doi.org/10.1109/tkde.2007.250581', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S30698027', 'display_name': 'IEEE Transactions on Knowledge and Data Engineering', 'issn_l': '1041-4347', 'issn': ['1041-4347', '1558-2191', '2326-3865'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320439', 'host_organization_name': 'IEEE Computer Society', 'host_organization_lineage': ['https://openalex.org/P4310320439', 'https://openalex.org/P4310319808'], 'host_organization_lineage_names': ['IEEE Computer Society', 'Institute of Electrical and Electronics Engineers'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'http://archive.nyu.edu/handle/2451/14760', 'pdf_url': 'http://archive.nyu.edu/bitstream/2451/14760/5/tkde2007.pdf', 'source': {'id': 'https://openalex.org/S4306401258', 'display_name': 'The Faculty Digital Archive (New York University)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I57206974', 'host_organization_name': 'New York University', 'host_organization_lineage': ['https://openalex.org/I57206974'], 'host_organization_lineage_names': ['New York University'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'submittedVersion', 'is_accepted': False, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'http://archive.nyu.edu/handle/2451/27823', 'pdf_url': 'http://archive.nyu.edu/bitstream/2451/27823/2/CeDER-PP-2007-15.pdf', 'source': {'id': 'https://openalex.org/S4306401258', 'display_name': 'The Faculty Digital Archive (New York University)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I57206974', 'host_organization_name': 'New York University', 'host_organization_lineage': ['https://openalex.org/I57206974'], 'host_organization_lineage_names': ['New York University'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'submittedVersion', 'is_accepted': False, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.147.3975', 'pdf_url': 'http://www.cs.purdue.edu/homes/ake/pub/survey2.pdf', 'source': {'id': 'https://openalex.org/S4306400349', 'display_name': 'CiteSeer X (The Pennsylvania State University)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I130769515', 'host_organization_name': 'Pennsylvania State University', 'host_organization_lineage': ['https://openalex.org/I130769515'], 'host_organization_lineage_names': ['Pennsylvania State University'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'submittedVersion', 'is_accepted': False, 'is_published': False}], 'best_oa_location': {'is_oa': True, 'landing_page_url': 'http://archive.nyu.edu/handle/2451/14760', 'pdf_url': 'http://archive.nyu.edu/bitstream/2451/14760/5/tkde2007.pdf', 'source': {'id': 'https://openalex.org/S4306401258', 'display_name': 'The Faculty Digital Archive (New York University)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I57206974', 'host_organization_name': 'New York University', 'host_organization_lineage': ['https://openalex.org/I57206974'], 'host_organization_lineage_names': ['New York University'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'submittedVersion', 'is_accepted': False, 'is_published': False}, 'sustainable_development_goals': [], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 88, 'referenced_works': ['https://openalex.org/W1480376833', 'https://openalex.org/W1485156179', 'https://openalex.org/W1539477445', 'https://openalex.org/W1559390933', 'https://openalex.org/W1564630549', 'https://openalex.org/W1569123402', 'https://openalex.org/W1594031697', 'https://openalex.org/W1604938182', 'https://openalex.org/W1612155886', 'https://openalex.org/W1617896182', 'https://openalex.org/W1620204465', 'https://openalex.org/W1647671624', 'https://openalex.org/W1761401273', 'https://openalex.org/W1768893245', 'https://openalex.org/W1887000090', 'https://openalex.org/W1893018', 'https://openalex.org/W1934019294', 'https://openalex.org/W1943251102', 'https://openalex.org/W1977545325', 'https://openalex.org/W1981409766', 'https://openalex.org/W1982806687', 'https://openalex.org/W1995999642', 'https://openalex.org/W2001496424', 'https://openalex.org/W2004305018', 'https://openalex.org/W2006997130', 'https://openalex.org/W2010595692', 'https://openalex.org/W2023358833', 'https://openalex.org/W2023448865', 'https://openalex.org/W2029873015', 'https://openalex.org/W2030689309', 'https://openalex.org/W2034190452', 'https://openalex.org/W2036216970', 'https://openalex.org/W2036289425', 'https://openalex.org/W2038281398', 'https://openalex.org/W2043481183', 'https://openalex.org/W2043551673', 'https://openalex.org/W2045821558', 'https://openalex.org/W2048679005', 'https://openalex.org/W2052390074', 'https://openalex.org/W2052581258', 'https://openalex.org/W2052899946', 'https://openalex.org/W2054804336', 'https://openalex.org/W2065199439', 'https://openalex.org/W2067566391', 'https://openalex.org/W2069140227', 'https://openalex.org/W2074231493', 'https://openalex.org/W2076203838', 'https://openalex.org/W2085099553', 'https://openalex.org/W2087064593', 'https://openalex.org/W2097089247', 'https://openalex.org/W2097730395', 'https://openalex.org/W2102443632', 'https://openalex.org/W2102462631', 'https://openalex.org/W2105423800', 'https://openalex.org/W2106895292', 'https://openalex.org/W2111625757', 'https://openalex.org/W2116544254', 'https://openalex.org/W2124410446', 'https://openalex.org/W2127675794', 'https://openalex.org/W2129598390', 'https://openalex.org/W2133236963', 'https://openalex.org/W2135223301', 'https://openalex.org/W2135326109', 'https://openalex.org/W2141469207', 'https://openalex.org/W2141634619', 'https://openalex.org/W2142023452', 'https://openalex.org/W2150698190', 'https://openalex.org/W2152565070', 'https://openalex.org/W2154711808', 'https://openalex.org/W2158823144', 'https://openalex.org/W2159481891', 'https://openalex.org/W2161600801', 'https://openalex.org/W2161936973', 'https://openalex.org/W2163652601', 'https://openalex.org/W2170902582', 'https://openalex.org/W2171574281', 'https://openalex.org/W2914959486', 'https://openalex.org/W2999282340', 'https://openalex.org/W4212848460', 'https://openalex.org/W4230030242', 'https://openalex.org/W4230502578', 'https://openalex.org/W4236236547', 'https://openalex.org/W4250366158', 'https://openalex.org/W4252421678', 'https://openalex.org/W4254734767', 'https://openalex.org/W4300930575', 'https://openalex.org/W46452414', 'https://openalex.org/W47292847'], 'related_works': ['https://openalex.org/W97075385', 'https://openalex.org/W4235240664', 'https://openalex.org/W3151146928', 'https://openalex.org/W2965083567', 'https://openalex.org/W2757182831', 'https://openalex.org/W2389214306', 'https://openalex.org/W2095886385', 'https://openalex.org/W2089704382', 'https://openalex.org/W1983399550', 'https://openalex.org/W1838576100'], 'abstract_inverted_index': {'Often,': [0], 'in': [1, 11, 97, 133], 'the': [2, 36, 62, 107, 129, 134], 'real': [3], 'world,': [4], 'entities': [5], 'have': [6], 'two': [7], 'or': [8, 47], 'more': [9], 'representations': [10], 'databases.': [12], 'Duplicate': [13], 'records': [14, 96], 'do': [15], 'not': [16], 'share': [17], 'a': [18, 29, 58, 98, 125], 'common': [19], 'key': [20], 'and/or': [21], 'they': [22], 'contain': [23], 'errors': [24], 'that': [25, 72, 91], 'make': [26], 'duplicate': [27, 65, 88, 95, 113], 'matching': [28], 'difficult': [30], 'task.': [31], 'Errors': [32], 'are': [33, 73], 'introduced': [34], 'as': [35], 'result': [37], 'of': [38, 44, 50, 61, 87, 111, 120, 128], 'transcription': [39], 'errors,': [40], 'incomplete': [41], 'information,': [42], 'lack': [43], 'standard': [45], 'formats,': [46], 'any': [48], 'combination': [49], 'these': [51], 'factors.': [52], 'In': [53], 'this': [54], 'paper,': [55], 'we': [56, 82], 'present': [57, 83], 'thorough': [59], 'analysis': [60], 'literature': [63], 'on': [64], 'record': [66], 'detection.': [67], 'We': [68, 100, 116], 'cover': [69, 102], 'similarity': [70], 'metrics': [71], 'commonly': [74], 'used': [75], 'to': [76], 'detect': [77, 93], 'similar': [78], 'field': [79], 'entries,': [80], 'and': [81, 109, 123], 'an': [84], 'extensive': [85], 'set': [86], 'detection': [89, 114], 'algorithms': [90], 'can': [92], 'approximately': [94], 'database.': [99], 'also': [101], 'multiple': [103], 'techniques': [104], 'for': [105], 'improving': [106], 'efficiency': [108], 'scalability': [110], 'approximate': [112], 'algorithms.': [115], 'conclude': [117], 'with': [118, 124], 'coverage': [119], 'existing': [121], 'tools': [122], 'brief': [126], 'discussion': [127], 'big': [130], 'open': [131], 'problems': [132], 'area': [135]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W3146259567', 'counts_by_year': [{'year': 2024, 'cited_by_count': 26}, {'year': 2023, 'cited_by_count': 62}, {'year': 2022, 'cited_by_count': 65}, {'year': 2021, 'cited_by_count': 62}, {'year': 2020, 'cited_by_count': 71}, {'year': 2019, 'cited_by_count': 78}, {'year': 2018, 'cited_by_count': 80}, {'year': 2017, 'cited_by_count': 79}, {'year': 2016, 'cited_by_count': 69}, {'year': 2015, 'cited_by_count': 79}, {'year': 2014, 'cited_by_count': 81}, {'year': 2013, 'cited_by_count': 98}, {'year': 2012, 'cited_by_count': 82}], 'updated_date': '2024-09-14T16:33:29.416755', 'created_date': '2021-04-13'}