Get quick answers to your questions about the article from our AI researcher chatbot
{'id': 'https://openalex.org/W2751367346', 'doi': 'https://doi.org/10.1016/j.ins.2017.08.096', 'title': 'How “small” reflects “large”?—Representative information measurement and extraction', 'display_name': 'How “small” reflects “large”?—Representative information measurement and extraction', 'publication_year': 2017, 'publication_date': '2017-09-05', 'ids': {'openalex': 'https://openalex.org/W2751367346', 'doi': 'https://doi.org/10.1016/j.ins.2017.08.096', 'mag': '2751367346'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://doi.org/10.1016/j.ins.2017.08.096', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S192650101', 'display_name': 'Information Sciences', 'issn_l': '0020-0255', 'issn': ['0020-0255', '1872-6291'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320990', 'host_organization_name': 'Elsevier BV', 'host_organization_lineage': ['https://openalex.org/P4310320990'], 'host_organization_lineage_names': ['Elsevier BV'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': ['crossref'], 'open_access': {'is_oa': False, 'oa_status': 'closed', 'oa_url': None, 'any_repository_has_fulltext': False}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5100342994', 'display_name': 'Guoqing Chen', 'orcid': 'https://orcid.org/0000-0001-7386-1133'}, 'institutions': [{'id': 'https://openalex.org/I99065089', 'display_name': 'Tsinghua University', 'ror': 'https://ror.org/03cve4549', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I99065089']}], 'countries': ['CN'], 'is_corresponding': False, 'raw_author_name': 'Guoqing Chen', 'raw_affiliation_strings': ['China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China'], 'affiliations': [{'raw_affiliation_string': 'China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China', 'institution_ids': ['https://openalex.org/I99065089']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5100390490', 'display_name': 'Cong Wang', 'orcid': 'https://orcid.org/0000-0002-5300-0122'}, 'institutions': [{'id': 'https://openalex.org/I99065089', 'display_name': 'Tsinghua University', 'ror': 'https://ror.org/03cve4549', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I99065089']}], 'countries': ['CN'], 'is_corresponding': False, 'raw_author_name': 'Cong Wang', 'raw_affiliation_strings': ['China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China'], 'affiliations': [{'raw_affiliation_string': 'China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China', 'institution_ids': ['https://openalex.org/I99065089']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5100405278', 'display_name': 'Mingyue Zhang', 'orcid': 'https://orcid.org/0000-0002-2198-1747'}, 'institutions': [{'id': 'https://openalex.org/I109173049', 'display_name': 'Beijing Foreign Studies University', 'ror': 'https://ror.org/00jdr0662', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I109173049']}, {'id': 'https://openalex.org/I99065089', 'display_name': 'Tsinghua University', 'ror': 'https://ror.org/03cve4549', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I99065089']}], 'countries': ['CN'], 'is_corresponding': True, 'raw_author_name': 'Mingyue Zhang', 'raw_affiliation_strings': ['China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China', 'International Business School, Beijing Foreign Studies University, Beijing 100089, China'], 'affiliations': [{'raw_affiliation_string': 'International Business School, Beijing Foreign Studies University, Beijing 100089, China', 'institution_ids': ['https://openalex.org/I109173049']}, {'raw_affiliation_string': 'China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China', 'institution_ids': ['https://openalex.org/I99065089']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5060658955', 'display_name': 'Qiang Wei', 'orcid': 'https://orcid.org/0000-0002-8397-7129'}, 'institutions': [{'id': 'https://openalex.org/I99065089', 'display_name': 'Tsinghua University', 'ror': 'https://ror.org/03cve4549', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I99065089']}], 'countries': ['CN'], 'is_corresponding': False, 'raw_author_name': 'Qiang Wei', 'raw_affiliation_strings': ['China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China'], 'affiliations': [{'raw_affiliation_string': 'China Retail Research Center, School of Economics and Management, Tsinghua University, Beijing 100084, China', 'institution_ids': ['https://openalex.org/I99065089']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5003795905', 'display_name': 'Baojun Ma', 'orcid': 'https://orcid.org/0000-0002-2274-3089'}, 'institutions': [{'id': 'https://openalex.org/I139759216', 'display_name': 'Beijing University of Posts and Telecommunications', 'ror': 'https://ror.org/04w9fbh59', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I139759216']}], 'countries': ['CN'], 'is_corresponding': False, 'raw_author_name': 'Baojun Ma', 'raw_affiliation_strings': ['School of Economics and Management, Beijing University of Posts and Telecommunications, Beijing 100876, China'], 'affiliations': [{'raw_affiliation_string': 'School of Economics and Management, Beijing University of Posts and Telecommunications, Beijing 100876, China', 'institution_ids': ['https://openalex.org/I139759216']}]}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 3, 'corresponding_author_ids': ['https://openalex.org/A5100405278'], 'corresponding_institution_ids': ['https://openalex.org/I109173049', 'https://openalex.org/I99065089'], 'apc_list': {'value': 3330, 'currency': 'USD', 'value_usd': 3330, 'provenance': 'doaj'}, 'apc_paid': None, 'fwci': 0.0, 'has_fulltext': False, 'cited_by_count': 6, 'citation_normalized_percentile': {'value': 0.723996, 'is_in_top_1_percent': False, 'is_in_top_10_percent': False}, 'cited_by_percentile_year': {'min': 81, 'max': 83}, 'biblio': {'volume': '460-461', 'issue': None, 'first_page': '519', 'last_page': '540'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T11719', 'display_name': 'Data Quality Assessment and Improvement', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1803', 'display_name': 'Management Science and Operations Research'}, 'field': {'id': 'https://openalex.org/fields/18', 'display_name': 'Decision Sciences'}, 'domain': {'id': 'https://openalex.org/domains/2', 'display_name': 'Social Sciences'}}, 'topics': [{'id': 'https://openalex.org/T11719', 'display_name': 'Data Quality Assessment and Improvement', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1803', 'display_name': 'Management Science and Operations Research'}, 'field': {'id': 'https://openalex.org/fields/18', 'display_name': 'Decision Sciences'}, 'domain': {'id': 'https://openalex.org/domains/2', 'display_name': 'Social Sciences'}}, {'id': 'https://openalex.org/T12016', 'display_name': 'Web Data Extraction and Crawling Techniques', 'score': 0.998, 'subfield': {'id': 'https://openalex.org/subfields/1710', 'display_name': 'Information Systems'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T10317', 'display_name': 'Data Stream Management Systems and Techniques', 'score': 0.9964, 'subfield': {'id': 'https://openalex.org/subfields/1705', 'display_name': 'Computer Networks and Communications'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/web-data-extraction', 'display_name': 'Web Data Extraction', 'score': 0.550925}, {'id': 'https://openalex.org/keywords/data-extraction', 'display_name': 'Data extraction', 'score': 0.5350806}, {'id': 'https://openalex.org/keywords/data-set', 'display_name': 'Data set', 'score': 0.42658192}], 'concepts': [{'id': 'https://openalex.org/C37381756', 'wikidata': 'https://www.wikidata.org/wiki/Q20203288', 'display_name': 'Representativeness heuristic', 'level': 2, 'score': 0.9641898}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.7908344}, {'id': 'https://openalex.org/C195807954', 'wikidata': 'https://www.wikidata.org/wiki/Q1662562', 'display_name': 'Information extraction', 'level': 2, 'score': 0.6621785}, {'id': 'https://openalex.org/C177264268', 'wikidata': 'https://www.wikidata.org/wiki/Q1514741', 'display_name': 'Set (abstract data type)', 'level': 2, 'score': 0.5879859}, {'id': 'https://openalex.org/C2779343474', 'wikidata': 'https://www.wikidata.org/wiki/Q3109175', 'display_name': 'Context (archaeology)', 'level': 2, 'score': 0.5779958}, {'id': 'https://openalex.org/C2777466982', 'wikidata': 'https://www.wikidata.org/wiki/Q5227287', 'display_name': 'Data extraction', 'level': 3, 'score': 0.5350806}, {'id': 'https://openalex.org/C23123220', 'wikidata': 'https://www.wikidata.org/wiki/Q816826', 'display_name': 'Information retrieval', 'level': 1, 'score': 0.51884115}, {'id': 'https://openalex.org/C124101348', 'wikidata': 'https://www.wikidata.org/wiki/Q172491', 'display_name': 'Data mining', 'level': 1, 'score': 0.49579224}, {'id': 'https://openalex.org/C2779530757', 'wikidata': 'https://www.wikidata.org/wiki/Q1207505', 'display_name': 'Quality (philosophy)', 'level': 2, 'score': 0.48806703}, {'id': 'https://openalex.org/C24756922', 'wikidata': 'https://www.wikidata.org/wiki/Q1757694', 'display_name': 'Data quality', 'level': 3, 'score': 0.44039312}, {'id': 'https://openalex.org/C58489278', 'wikidata': 'https://www.wikidata.org/wiki/Q1172284', 'display_name': 'Data set', 'level': 2, 'score': 0.42658192}, {'id': 'https://openalex.org/C2522767166', 'wikidata': 'https://www.wikidata.org/wiki/Q2374463', 'display_name': 'Data science', 'level': 1, 'score': 0.38790023}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.18226856}, {'id': 'https://openalex.org/C2780378061', 'wikidata': 'https://www.wikidata.org/wiki/Q25351891', 'display_name': 'Service (business)', 'level': 2, 'score': 0.13982725}, {'id': 'https://openalex.org/C33923547', 'wikidata': 'https://www.wikidata.org/wiki/Q395', 'display_name': 'Mathematics', 'level': 0, 'score': 0.11448476}, {'id': 'https://openalex.org/C105795698', 'wikidata': 'https://www.wikidata.org/wiki/Q12483', 'display_name': 'Statistics', 'level': 1, 'score': 0.113663524}, {'id': 'https://openalex.org/C205649164', 'wikidata': 'https://www.wikidata.org/wiki/Q1071', 'display_name': 'Geography', 'level': 0, 'score': 0.10834077}, {'id': 'https://openalex.org/C138885662', 'wikidata': 'https://www.wikidata.org/wiki/Q5891', 'display_name': 'Philosophy', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C136264566', 'wikidata': 'https://www.wikidata.org/wiki/Q159810', 'display_name': 'Economy', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C166957645', 'wikidata': 'https://www.wikidata.org/wiki/Q23498', 'display_name': 'Archaeology', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C2779473830', 'wikidata': 'https://www.wikidata.org/wiki/Q1540899', 'display_name': 'MEDLINE', 'level': 2, 'score': 0.0}, {'id': 'https://openalex.org/C111472728', 'wikidata': 'https://www.wikidata.org/wiki/Q9471', 'display_name': 'Epistemology', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C17744445', 'wikidata': 'https://www.wikidata.org/wiki/Q36442', 'display_name': 'Political science', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C199539241', 'wikidata': 'https://www.wikidata.org/wiki/Q7748', 'display_name': 'Law', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C162324750', 'wikidata': 'https://www.wikidata.org/wiki/Q8134', 'display_name': 'Economics', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C199360897', 'wikidata': 'https://www.wikidata.org/wiki/Q9143', 'display_name': 'Programming language', 'level': 1, 'score': 0.0}], 'mesh': [], 'locations_count': 1, 'locations': [{'is_oa': False, 'landing_page_url': 'https://doi.org/10.1016/j.ins.2017.08.096', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S192650101', 'display_name': 'Information Sciences', 'issn_l': '0020-0255', 'issn': ['0020-0255', '1872-6291'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320990', 'host_organization_name': 'Elsevier BV', 'host_organization_lineage': ['https://openalex.org/P4310320990'], 'host_organization_lineage_names': ['Elsevier BV'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}], 'best_oa_location': None, 'sustainable_development_goals': [], 'grants': [{'funder': 'https://openalex.org/F4320321001', 'funder_display_name': 'National Natural Science Foundation of China', 'award_id': '71110107027'}, {'funder': 'https://openalex.org/F4320321001', 'funder_display_name': 'National Natural Science Foundation of China', 'award_id': '71490724'}, {'funder': 'https://openalex.org/F4320321001', 'funder_display_name': 'National Natural Science Foundation of China', 'award_id': '71372044'}, {'funder': 'https://openalex.org/F4320321001', 'funder_display_name': 'National Natural Science Foundation of China', 'award_id': '71402007'}], 'datasets': [], 'versions': [], 'referenced_works_count': 64, 'referenced_works': ['https://openalex.org/W1515087027', 'https://openalex.org/W1536516100', 'https://openalex.org/W1557757161', 'https://openalex.org/W1567491469', 'https://openalex.org/W1680189815', 'https://openalex.org/W1687668752', 'https://openalex.org/W1965406039', 'https://openalex.org/W1966503997', 'https://openalex.org/W1970095591', 'https://openalex.org/W1970107420', 'https://openalex.org/W1989900570', 'https://openalex.org/W2002437555', 'https://openalex.org/W2004089651', 'https://openalex.org/W2007807439', 'https://openalex.org/W2037022968', 'https://openalex.org/W2041404167', 'https://openalex.org/W2046122205', 'https://openalex.org/W2067505258', 'https://openalex.org/W2083037024', 'https://openalex.org/W2083305840', 'https://openalex.org/W2089139448', 'https://openalex.org/W2100329159', 'https://openalex.org/W2104456647', 'https://openalex.org/W2105200992', 'https://openalex.org/W2105736812', 'https://openalex.org/W2111313443', 'https://openalex.org/W2114581066', 'https://openalex.org/W2116401198', 'https://openalex.org/W2117827905', 'https://openalex.org/W2121392694', 'https://openalex.org/W2123937625', 'https://openalex.org/W2125247664', 'https://openalex.org/W2136177758', 'https://openalex.org/W2152679188', 'https://openalex.org/W2153066044', 'https://openalex.org/W2158655831', 'https://openalex.org/W2160660844', 'https://openalex.org/W2162788093', 'https://openalex.org/W2197919320', 'https://openalex.org/W2266602631', 'https://openalex.org/W2289957495', 'https://openalex.org/W2398153122', 'https://openalex.org/W2768019803', 'https://openalex.org/W2886166656', 'https://openalex.org/W2912218307', 'https://openalex.org/W2913941020', 'https://openalex.org/W2978725006', 'https://openalex.org/W3123703810', 'https://openalex.org/W3123967386', 'https://openalex.org/W3124392856', 'https://openalex.org/W3124946654', 'https://openalex.org/W3125189097', 'https://openalex.org/W4230574761', 'https://openalex.org/W4238430687', 'https://openalex.org/W4240936076', 'https://openalex.org/W4242959488', 'https://openalex.org/W4244195804', 'https://openalex.org/W4245219133', 'https://openalex.org/W4247119837', 'https://openalex.org/W4250331344', 'https://openalex.org/W4256250826', 'https://openalex.org/W4298359459', 'https://openalex.org/W4300344234', 'https://openalex.org/W69239580'], 'related_works': ['https://openalex.org/W4306248409', 'https://openalex.org/W4211213551', 'https://openalex.org/W3159631231', 'https://openalex.org/W2914559142', 'https://openalex.org/W2882989837', 'https://openalex.org/W2103926897', 'https://openalex.org/W2101250918', 'https://openalex.org/W2094985717', 'https://openalex.org/W2062728131', 'https://openalex.org/W1824075546'], 'abstract_inverted_index': {'While': [0], 'web': [1], 'services': [2], 'avail': [3], 'a': [4, 40, 59, 93, 102, 106, 121], 'rapid': [5], 'growth': [6], 'of': [7, 16, 27, 67, 71, 89, 95], 'data': [8, 133], 'volume': [9], 'for': [10], 'use,': [11], 'identifying': [12], 'helpful': [13], 'information': [14, 52, 115], 'is': [15, 31, 109, 128], 'great': [17], 'value,': [18], 'especially': [19], 'when': [20], 'users': [21, 38], 'face': [22], 'with': [23, 39], 'an': [24], 'unwilling': [25], 'glut': [26], 'information.': [28], 'Thus,': [29], 'it': [30], 'deemed': [32], 'relevant': [33], 'and': [34, 73, 104, 143], 'meaningful': [35], 'to': [36, 113, 138], 'provide': [37], 'representative': [41], 'subset': [42], '(i.e.,': [43, 54], 'small': [44], 'set)': [45], 'that': [46], 'could': [47], 'well': [48], 'reflect': [49], 'the': [50, 65], 'original': [51], 'corpus': [53], 'large': [55], 'set).': [56], 'In': [57], 'such': [58], 'large–small': [60], 'context,': [61], 'this': [62], 'paper': [63], 'addresses': [64], 'issues': [66], 'representativeness': [68, 97], 'in': [69, 119, 140], 'light': [70], 'measurement': [72], 'extraction': [74, 98, 126], 'by': [75, 130], 'reviewing': [76], 'our': [77], 'previous': [78], 'efforts.': [79], 'Specifically,': [80], 'we': [81], 'first': [82], 'discuss': [83], 'various': [84], 'metrics': [85], 'from': [86], 'different': [87], 'perspectives': [88], 'representativeness,': [90], 'then': [91], 'present': [92], 'series': [94], 'related': [96], 'methods.': [99], 'Finally': [100], 'as': [101], 'supplement': [103], 'extension,': [105], 'recent': [107], 'effort': [108], 'introduced,': [110], 'which': [111], 'aims': [112], 'take': [114], 'quality': [116], 'into': [117], 'account': [118], 'deriving': [120], 'ranked': [122], 'subset.': [123], 'The': [124], 'proposed': [125], 'method': [127], 'justified': [129], 'extensive': [131], 'real-world': [132], 'experiments,': [134], 'showing': [135], 'its': [136], 'superiority': [137], 'others': [139], 'both': [141], 'effectiveness': [142], 'efficiency.': [144]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W2751367346', 'counts_by_year': [{'year': 2024, 'cited_by_count': 1}, {'year': 2023, 'cited_by_count': 1}, {'year': 2022, 'cited_by_count': 2}, {'year': 2019, 'cited_by_count': 2}], 'updated_date': '2024-09-18T21:54:46.564732', 'created_date': '2017-09-15'}