Wikilinks: A Large-scale Cross-Document Coreference Corpus Labeled via Links to Wikipedia

Sameer Singh; Amarnag Subramanya; Fernando Pereira; Andrew McCallum
{'id': 'https://openalex.org/W2409706897', 'doi': None, 'title': 'Wikilinks: A Large-scale Cross-Document Coreference Corpus Labeled via Links to Wikipedia', 'display_name': 'Wikilinks: A Large-scale Cross-Document Coreference Corpus Labeled via Links to Wikipedia', 'publication_year': 2012, 'publication_date': '2012-01-01', 'ids': {'openalex': 'https://openalex.org/W2409706897', 'mag': '2409706897'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://web.cs.umass.edu/publication/docs/2012/UM-CS-2012-015.pdf', 'pdf_url': None, 'source': None, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': [], 'open_access': {'is_oa': False, 'oa_status': 'closed', 'oa_url': None, 'any_repository_has_fulltext': False}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5005779128', 'display_name': 'Sameer Singh', 'orcid': 'https://orcid.org/0000-0003-0621-6323'}, 'institutions': [{'id': 'https://openalex.org/I24603500', 'display_name': 'University of Massachusetts Amherst', 'ror': 'https://ror.org/0072zz521', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I24603500']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Sameer Singh', 'raw_affiliation_strings': ['University of Massachusetts, Amherst'], 'affiliations': [{'raw_affiliation_string': 'University of Massachusetts, Amherst', 'institution_ids': ['https://openalex.org/I24603500']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5017113212', 'display_name': 'Amarnag Subramanya', 'orcid': None}, 'institutions': [], 'countries': [], 'is_corresponding': False, 'raw_author_name': 'Amarnag Subramanya', 'raw_affiliation_strings': [], 'affiliations': []}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5044708805', 'display_name': 'Fernando Pereira', 'orcid': 'https://orcid.org/0000-0001-6100-947X'}, 'institutions': [], 'countries': [], 'is_corresponding': False, 'raw_author_name': 'Fernando Pereira', 'raw_affiliation_strings': [], 'affiliations': []}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5008354502', 'display_name': 'Andrew McCallum', 'orcid': 'https://orcid.org/0000-0003-2843-6992'}, 'institutions': [], 'countries': [], 'is_corresponding': False, 'raw_author_name': 'Andrew Mccallum', 'raw_affiliation_strings': [], 'affiliations': []}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 1, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': None, 'apc_paid': None, 'fwci': 6.143, 'has_fulltext': False, 'cited_by_count': 99, 'citation_normalized_percentile': {'value': 0.957287, 'is_in_top_1_percent': False, 'is_in_top_10_percent': True}, 'cited_by_percentile_year': {'min': 97, 'max': 98}, 'biblio': {'volume': None, 'issue': None, 'first_page': None, 'last_page': None}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10181', 'display_name': 'Statistical Machine Translation and Natural Language Processing', 'score': 0.9999, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10181', 'display_name': 'Statistical Machine Translation and Natural Language Processing', 'score': 0.9999, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T10028', 'display_name': 'Natural Language Processing', 'score': 0.9997, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T12478', 'display_name': 'Collaboration and Dynamics in Wikipedia Research', 'score': 0.9937, 'subfield': {'id': 'https://openalex.org/subfields/3315', 'display_name': 'Communication'}, 'field': {'id': 'https://openalex.org/fields/33', 'display_name': 'Social Sciences'}, 'domain': {'id': 'https://openalex.org/domains/2', 'display_name': 'Social Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/coreference', 'display_name': 'Coreference', 'score': 0.9417535}, {'id': 'https://openalex.org/keywords/hyperlink', 'display_name': 'Hyperlink', 'score': 0.69788826}, {'id': 'https://openalex.org/keywords/wikis', 'display_name': 'wikis', 'score': 0.544285}, {'id': 'https://openalex.org/keywords/entity-linking', 'display_name': 'Entity linking', 'score': 0.52914935}, {'id': 'https://openalex.org/keywords/knowledge-management', 'display_name': 'Knowledge Management', 'score': 0.518175}, {'id': 'https://openalex.org/keywords/crowdsourcing', 'display_name': 'Crowdsourcing', 'score': 0.516419}, {'id': 'https://openalex.org/keywords/named-entity-recognition', 'display_name': 'Named Entity Recognition', 'score': 0.50179}], 'concepts': [{'id': 'https://openalex.org/C28076734', 'wikidata': 'https://www.wikidata.org/wiki/Q63087', 'display_name': 'Coreference', 'level': 3, 'score': 0.9417535}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.81464195}, {'id': 'https://openalex.org/C30088001', 'wikidata': 'https://www.wikidata.org/wiki/Q102014', 'display_name': 'Hyperlink', 'level': 3, 'score': 0.69788826}, {'id': 'https://openalex.org/C23123220', 'wikidata': 'https://www.wikidata.org/wiki/Q816826', 'display_name': 'Information retrieval', 'level': 1, 'score': 0.6335201}, {'id': 'https://openalex.org/C2780451532', 'wikidata': 'https://www.wikidata.org/wiki/Q759676', 'display_name': 'Task (project management)', 'level': 2, 'score': 0.5927762}, {'id': 'https://openalex.org/C204321447', 'wikidata': 'https://www.wikidata.org/wiki/Q30642', 'display_name': 'Natural language processing', 'level': 1, 'score': 0.5865693}, {'id': 'https://openalex.org/C2776214188', 'wikidata': 'https://www.wikidata.org/wiki/Q408386', 'display_name': 'Inference', 'level': 2, 'score': 0.5732205}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.53109586}, {'id': 'https://openalex.org/C96711827', 'wikidata': 'https://www.wikidata.org/wiki/Q17012245', 'display_name': 'Entity linking', 'level': 3, 'score': 0.52914935}, {'id': 'https://openalex.org/C138268822', 'wikidata': 'https://www.wikidata.org/wiki/Q1051925', 'display_name': 'Resolution (logic)', 'level': 2, 'score': 0.48112002}, {'id': 'https://openalex.org/C4554734', 'wikidata': 'https://www.wikidata.org/wiki/Q593744', 'display_name': 'Knowledge base', 'level': 2, 'score': 0.45398206}, {'id': 'https://openalex.org/C2778755073', 'wikidata': 'https://www.wikidata.org/wiki/Q10858537', 'display_name': 'Scale (ratio)', 'level': 2, 'score': 0.4243081}, {'id': 'https://openalex.org/C136764020', 'wikidata': 'https://www.wikidata.org/wiki/Q466', 'display_name': 'World Wide Web', 'level': 1, 'score': 0.28684825}, {'id': 'https://openalex.org/C21959979', 'wikidata': 'https://www.wikidata.org/wiki/Q36774', 'display_name': 'Web page', 'level': 2, 'score': 0.1268447}, {'id': 'https://openalex.org/C205649164', 'wikidata': 'https://www.wikidata.org/wiki/Q1071', 'display_name': 'Geography', 'level': 0, 'score': 0.07753572}, {'id': 'https://openalex.org/C58640448', 'wikidata': 'https://www.wikidata.org/wiki/Q42515', 'display_name': 'Cartography', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C187736073', 'wikidata': 'https://www.wikidata.org/wiki/Q2920921', 'display_name': 'Management', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C162324750', 'wikidata': 'https://www.wikidata.org/wiki/Q8134', 'display_name': 'Economics', 'level': 0, 'score': 0.0}], 'mesh': [], 'locations_count': 1, 'locations': [{'is_oa': False, 'landing_page_url': 'https://web.cs.umass.edu/publication/docs/2012/UM-CS-2012-015.pdf', 'pdf_url': None, 'source': None, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}], 'best_oa_location': None, 'sustainable_development_goals': [{'score': 0.66, 'display_name': 'Quality education', 'id': 'https://metadata.un.org/sdg/4'}], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 26, 'referenced_works': ['https://openalex.org/W11298561', 'https://openalex.org/W1502876877', 'https://openalex.org/W1510293779', 'https://openalex.org/W1789782362', 'https://openalex.org/W191584165', 'https://openalex.org/W2097734711', 'https://openalex.org/W2099982145', 'https://openalex.org/W2109215439', 'https://openalex.org/W2110630251', 'https://openalex.org/W2111535751', 'https://openalex.org/W2113013741', 'https://openalex.org/W2113227740', 'https://openalex.org/W2132679783', 'https://openalex.org/W2140037211', 'https://openalex.org/W2142313532', 'https://openalex.org/W2145453687', 'https://openalex.org/W2153911474', 'https://openalex.org/W2153947693', 'https://openalex.org/W2181629536', 'https://openalex.org/W2243237654', 'https://openalex.org/W2250750514', 'https://openalex.org/W2404643063', 'https://openalex.org/W3103362336', 'https://openalex.org/W43416609', 'https://openalex.org/W59466250', 'https://openalex.org/W86887328'], 'related_works': ['https://openalex.org/W86887328', 'https://openalex.org/W2436001372', 'https://openalex.org/W2406945108', 'https://openalex.org/W2250750514', 'https://openalex.org/W2250539671', 'https://openalex.org/W2157191138', 'https://openalex.org/W2151048449', 'https://openalex.org/W2131357087', 'https://openalex.org/W2123442489', 'https://openalex.org/W2120699290', 'https://openalex.org/W2115352105', 'https://openalex.org/W2104583100', 'https://openalex.org/W2100341149', 'https://openalex.org/W2099982145', 'https://openalex.org/W2094728533', 'https://openalex.org/W2081580037', 'https://openalex.org/W2022166150', 'https://openalex.org/W1713614699', 'https://openalex.org/W1548663377', 'https://openalex.org/W11298561'], 'abstract_inverted_index': {'Cross-document': [0], 'coreference': [1, 51], 'resolution': [2], 'is': [3, 25, 92], 'the': [4, 8, 74], 'task': [5], 'of': [6, 14, 66, 78, 126], 'grouping': [7], 'entity': [9, 132], 'mentions': [10, 81], 'in': [11], 'a': [12, 21, 59, 100], 'collection': [13], 'documents': [15], 'into': [16], 'sets': [17], 'that': [18], 'each': [19], 'represent': [20], 'distinct': [22], 'entity.': [23], 'It': [24], 'central': [26], 'to': [27, 97, 111, 122], 'knowledge': [28], 'base': [29], 'construction': [30], 'and': [31, 48, 103, 130], 'also': [32, 72], 'useful': [33], 'for': [34, 46, 61], 'joint': [35], 'inference': [36], 'with': [37], 'other': [38], 'NLP': [39], 'components.': [40], 'Obtaining': [41], 'large,': [42], 'organic': [43], 'labeled': [44, 114], 'datasets': [45], 'training': [47], 'testing': [49], 'cross-document': [50, 68], 'has': [52], 'previously': [53], 'been': [54], 'difficult.': [55], 'This': [56], 'paper': [57], 'presents': [58], 'method': [60, 91], 'automatically': [62], 'gathering': [63], 'massive': [64], 'amounts': [65], 'naturally-occurring': [67], 'reference': [69], 'data.': [70], 'We': [71], 'present': [73], 'Wikilinks': [75], 'dataset': [76], 'comprising': [77], '40': [79], 'million': [80, 84], 'over': [82], '3': [83], 'entities,': [85], 'gathered': [86], 'using': [87, 104], 'this': [88], 'method.': [89], 'Our': [90], 'based': [93], 'on': [94], 'finding': [95], 'hyperlinks': [96], 'Wikipedia': [98], 'from': [99], 'web': [101], 'crawl': [102], 'anchor': [105], 'text': [106, 127], 'as': [107], 'mentions.': [108], 'In': [109], 'addition': [110], 'providing': [112], 'large-scale': [113], 'data': [115], 'without': [116], 'human': [117], 'effort,': [118], 'we': [119], 'are': [120], 'able': [121], 'include': [123], 'many': [124, 131], 'styles': [125], 'beyond': [128, 134], 'newswire': [129], 'types': [133], 'people.': [135]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W2409706897', 'counts_by_year': [{'year': 2023, 'cited_by_count': 3}, {'year': 2022, 'cited_by_count': 4}, {'year': 2021, 'cited_by_count': 10}, {'year': 2020, 'cited_by_count': 9}, {'year': 2019, 'cited_by_count': 9}, {'year': 2018, 'cited_by_count': 14}, {'year': 2017, 'cited_by_count': 12}, {'year': 2016, 'cited_by_count': 15}, {'year': 2015, 'cited_by_count': 7}, {'year': 2014, 'cited_by_count': 8}, {'year': 2013, 'cited_by_count': 8}], 'updated_date': '2024-09-19T07:49:45.836789', 'created_date': '2016-06-24'}
Publication Information

Basic Information

Access and Citation

AI Researcher Chatbot

Primary Location

Authors

Topics

Keywords

Related Works