Get quick answers to your questions about the article from our AI researcher chatbot
{'id': 'https://openalex.org/W4200520333', 'doi': 'https://doi.org/10.3390/v13122426', 'title': 'Semi-Supervised Pipeline for Autonomous Annotation of SARS-CoV-2 Genomes', 'display_name': 'Semi-Supervised Pipeline for Autonomous Annotation of SARS-CoV-2 Genomes', 'publication_year': 2021, 'publication_date': '2021-12-03', 'ids': {'openalex': 'https://openalex.org/W4200520333', 'doi': 'https://doi.org/10.3390/v13122426', 'pmid': 'https://pubmed.ncbi.nlm.nih.gov/34960694'}, 'language': 'en', 'primary_location': {'is_oa': True, 'landing_page_url': 'https://doi.org/10.3390/v13122426', 'pdf_url': 'https://www.mdpi.com/1999-4915/13/12/2426/pdf?version=1638522879', 'source': {'id': 'https://openalex.org/S55776271', 'display_name': 'Viruses', 'issn_l': '1999-4915', 'issn': ['1999-4915'], 'is_oa': True, 'is_in_doaj': True, 'is_core': True, 'host_organization': 'https://openalex.org/P4310310987', 'host_organization_name': 'Multidisciplinary Digital Publishing Institute', 'host_organization_lineage': ['https://openalex.org/P4310310987'], 'host_organization_lineage_names': ['Multidisciplinary Digital Publishing Institute'], 'type': 'journal'}, 'license': 'cc-by', 'license_id': 'https://openalex.org/licenses/cc-by', 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': ['crossref', 'doaj', 'pubmed'], 'open_access': {'is_oa': True, 'oa_status': 'gold', 'oa_url': 'https://www.mdpi.com/1999-4915/13/12/2426/pdf?version=1638522879', 'any_repository_has_fulltext': True}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5073381107', 'display_name': 'Kristen L. Beck', 'orcid': 'https://orcid.org/0000-0002-4603-0235'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': True, 'raw_author_name': 'Kristen L. Beck', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5078676482', 'display_name': 'Ed Seabolt', 'orcid': 'https://orcid.org/0000-0002-2286-0226'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': True, 'raw_author_name': 'Edward Seabolt', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5103153435', 'display_name': 'Akshay Agarwal', 'orcid': 'https://orcid.org/0000-0002-3075-0916'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Akshay Agarwal', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5068945846', 'display_name': 'Gowri Nayar', 'orcid': 'https://orcid.org/0000-0001-5819-7115'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Gowri Nayar', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5039069155', 'display_name': 'Simone Bianco', 'orcid': 'https://orcid.org/0000-0003-3367-2084'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Simone Bianco', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'NSF Center for Cellular Construction, San Francisco, CA 94158, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}, {'raw_affiliation_string': 'NSF Center for Cellular Construction, San Francisco, CA 94158, USA', 'institution_ids': []}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5064110512', 'display_name': 'Harsha Krishnareddy', 'orcid': 'https://orcid.org/0000-0002-9932-0076'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Harsha Krishnareddy', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5041505367', 'display_name': 'Timothy Ngo', 'orcid': 'https://orcid.org/0000-0002-8006-3129'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Timothy A. Ngo', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5058949709', 'display_name': 'Mark Kunitomi', 'orcid': 'https://orcid.org/0000-0002-4626-8972'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Mark Kunitomi', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5082917950', 'display_name': 'Vandana Mukherjee', 'orcid': 'https://orcid.org/0000-0002-8189-328X'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Vandana Mukherjee', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5059270545', 'display_name': 'James H. Kaufman', 'orcid': 'https://orcid.org/0000-0002-7049-7134'}, 'institutions': [{'id': 'https://openalex.org/I4210085935', 'display_name': 'IBM Research - Almaden', 'ror': 'https://ror.org/005w8dd04', 'country_code': 'US', 'type': 'facility', 'lineage': ['https://openalex.org/I1341412227', 'https://openalex.org/I4210085935', 'https://openalex.org/I4210114115']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'James H. Kaufman', 'raw_affiliation_strings': ['AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA'], 'affiliations': [{'raw_affiliation_string': 'AI and Cognitive Software, IBM Almaden Research Center, San Jose, CA 95120, USA', 'institution_ids': ['https://openalex.org/I4210085935']}]}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 1, 'corresponding_author_ids': ['https://openalex.org/A5073381107', 'https://openalex.org/A5078676482'], 'corresponding_institution_ids': ['https://openalex.org/I4210085935', 'https://openalex.org/I4210085935'], 'apc_list': {'value': 2600, 'currency': 'CHF', 'value_usd': 2815, 'provenance': 'doaj'}, 'apc_paid': {'value': 2600, 'currency': 'CHF', 'value_usd': 2815, 'provenance': 'doaj'}, 'fwci': 0.478, 'has_fulltext': False, 'cited_by_count': 6, 'citation_normalized_percentile': {'value': 0.638317, 'is_in_top_1_percent': False, 'is_in_top_10_percent': False}, 'cited_by_percentile_year': {'min': 81, 'max': 83}, 'biblio': {'volume': '13', 'issue': '12', 'first_page': '2426', 'last_page': '2426'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10015', 'display_name': 'Genomics and Phylogenetic Studies', 'score': 0.9981, 'subfield': {'id': 'https://openalex.org/subfields/1312', 'display_name': 'Molecular Biology'}, 'field': {'id': 'https://openalex.org/fields/13', 'display_name': 'Biochemistry, Genetics and Molecular Biology'}, 'domain': {'id': 'https://openalex.org/domains/1', 'display_name': 'Life Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10015', 'display_name': 'Genomics and Phylogenetic Studies', 'score': 0.9981, 'subfield': {'id': 'https://openalex.org/subfields/1312', 'display_name': 'Molecular Biology'}, 'field': {'id': 'https://openalex.org/fields/13', 'display_name': 'Biochemistry, Genetics and Molecular Biology'}, 'domain': {'id': 'https://openalex.org/domains/1', 'display_name': 'Life Sciences'}}, {'id': 'https://openalex.org/T12254', 'display_name': 'Machine Learning in Bioinformatics', 'score': 0.9977, 'subfield': {'id': 'https://openalex.org/subfields/1312', 'display_name': 'Molecular Biology'}, 'field': {'id': 'https://openalex.org/fields/13', 'display_name': 'Biochemistry, Genetics and Molecular Biology'}, 'domain': {'id': 'https://openalex.org/domains/1', 'display_name': 'Life Sciences'}}, {'id': 'https://openalex.org/T11048', 'display_name': 'Bacteriophages and microbial interactions', 'score': 0.9954, 'subfield': {'id': 'https://openalex.org/subfields/2303', 'display_name': 'Ecology'}, 'field': {'id': 'https://openalex.org/fields/23', 'display_name': 'Environmental Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/proteome', 'display_name': 'Proteome', 'score': 0.53446823}, {'id': 'https://openalex.org/keywords/gene-annotation', 'display_name': 'Gene Annotation', 'score': 0.5302605}, {'id': 'https://openalex.org/keywords/ensembl', 'display_name': 'Ensembl', 'score': 0.43244419}], 'concepts': [{'id': 'https://openalex.org/C2776321320', 'wikidata': 'https://www.wikidata.org/wiki/Q857525', 'display_name': 'Annotation', 'level': 2, 'score': 0.78260946}, {'id': 'https://openalex.org/C141231307', 'wikidata': 'https://www.wikidata.org/wiki/Q7020', 'display_name': 'Genome', 'level': 3, 'score': 0.7730944}, {'id': 'https://openalex.org/C70721500', 'wikidata': 'https://www.wikidata.org/wiki/Q177005', 'display_name': 'Computational biology', 'level': 1, 'score': 0.6878227}, {'id': 'https://openalex.org/C86803240', 'wikidata': 'https://www.wikidata.org/wiki/Q420', 'display_name': 'Biology', 'level': 0, 'score': 0.5619277}, {'id': 'https://openalex.org/C89566754', 'wikidata': 'https://www.wikidata.org/wiki/Q2273828', 'display_name': 'Genome project', 'level': 4, 'score': 0.5390337}, {'id': 'https://openalex.org/C104397665', 'wikidata': 'https://www.wikidata.org/wiki/Q860947', 'display_name': 'Proteome', 'level': 2, 'score': 0.53446823}, {'id': 'https://openalex.org/C2908923196', 'wikidata': 'https://www.wikidata.org/wiki/Q5205742', 'display_name': 'Gene Annotation', 'level': 4, 'score': 0.5302605}, {'id': 'https://openalex.org/C43521106', 'wikidata': 'https://www.wikidata.org/wiki/Q2165493', 'display_name': 'Pipeline (software)', 'level': 2, 'score': 0.5028841}, {'id': 'https://openalex.org/C104317684', 'wikidata': 'https://www.wikidata.org/wiki/Q7187', 'display_name': 'Gene', 'level': 2, 'score': 0.47885394}, {'id': 'https://openalex.org/C141674004', 'wikidata': 'https://www.wikidata.org/wiki/Q1344256', 'display_name': 'Ensembl', 'level': 5, 'score': 0.43244419}, {'id': 'https://openalex.org/C54355233', 'wikidata': 'https://www.wikidata.org/wiki/Q7162', 'display_name': 'Genetics', 'level': 1, 'score': 0.4175985}, {'id': 'https://openalex.org/C144292202', 'wikidata': 'https://www.wikidata.org/wiki/Q898273', 'display_name': 'Protein domain', 'level': 3, 'score': 0.41628674}, {'id': 'https://openalex.org/C189206191', 'wikidata': 'https://www.wikidata.org/wiki/Q222046', 'display_name': 'Genomics', 'level': 4, 'score': 0.3924446}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.30644435}, {'id': 'https://openalex.org/C199360897', 'wikidata': 'https://www.wikidata.org/wiki/Q9143', 'display_name': 'Programming language', 'level': 1, 'score': 0.0}], 'mesh': [{'descriptor_ui': 'D000086382', 'descriptor_name': 'COVID-19', 'qualifier_ui': 'Q000821', 'qualifier_name': 'virology', 'is_major_topic': True}, {'descriptor_ui': 'D016679', 'descriptor_name': 'Genome, Viral', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': True}, {'descriptor_ui': 'D058977', 'descriptor_name': 'Molecular Sequence Annotation', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': True}, {'descriptor_ui': 'D000086402', 'descriptor_name': 'SARS-CoV-2', 'qualifier_ui': 'Q000235', 'qualifier_name': 'genetics', 'is_major_topic': True}, {'descriptor_ui': 'D000595', 'descriptor_name': 'Amino Acid Sequence', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D001483', 'descriptor_name': 'Base Sequence', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D000086382', 'descriptor_name': 'COVID-19', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D019295', 'descriptor_name': 'Computational Biology', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D006801', 'descriptor_name': 'Humans', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D009154', 'descriptor_name': 'Mutation', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D011485', 'descriptor_name': 'Protein Binding', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D000072417', 'descriptor_name': 'Protein Domains', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D000086402', 'descriptor_name': 'SARS-CoV-2', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D064370', 'descriptor_name': 'Spike Glycoprotein, Coronavirus', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D064370', 'descriptor_name': 'Spike Glycoprotein, Coronavirus', 'qualifier_ui': 'Q000235', 'qualifier_name': 'genetics', 'is_major_topic': False}], 'locations_count': 4, 'locations': [{'is_oa': True, 'landing_page_url': 'https://doi.org/10.3390/v13122426', 'pdf_url': 'https://www.mdpi.com/1999-4915/13/12/2426/pdf?version=1638522879', 'source': {'id': 'https://openalex.org/S55776271', 'display_name': 'Viruses', 'issn_l': '1999-4915', 'issn': ['1999-4915'], 'is_oa': True, 'is_in_doaj': True, 'is_core': True, 'host_organization': 'https://openalex.org/P4310310987', 'host_organization_name': 'Multidisciplinary Digital Publishing Institute', 'host_organization_lineage': ['https://openalex.org/P4310310987'], 'host_organization_lineage_names': ['Multidisciplinary Digital Publishing Institute'], 'type': 'journal'}, 'license': 'cc-by', 'license_id': 'https://openalex.org/licenses/cc-by', 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}, {'is_oa': False, 'landing_page_url': 'https://doaj.org/article/88041c92a4e64ff68aa635a2d5e4ac10', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4306401280', 'display_name': 'DOAJ (DOAJ: Directory of Open Access Journals)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': None, 'host_organization_name': None, 'host_organization_lineage': [], 'host_organization_lineage_names': [], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8706859', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S2764455111', 'display_name': 'PubMed Central', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I1299303238', 'host_organization_name': 'National Institutes of Health', 'host_organization_lineage': ['https://openalex.org/I1299303238'], 'host_organization_lineage_names': ['National Institutes of Health'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}, {'is_oa': False, 'landing_page_url': 'https://pubmed.ncbi.nlm.nih.gov/34960694', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4306525036', 'display_name': 'PubMed', 'issn_l': None, 'issn': None, 'is_oa': False, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I1299303238', 'host_organization_name': 'National Institutes of Health', 'host_organization_lineage': ['https://openalex.org/I1299303238'], 'host_organization_lineage_names': ['National Institutes of Health'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}], 'best_oa_location': {'is_oa': True, 'landing_page_url': 'https://doi.org/10.3390/v13122426', 'pdf_url': 'https://www.mdpi.com/1999-4915/13/12/2426/pdf?version=1638522879', 'source': {'id': 'https://openalex.org/S55776271', 'display_name': 'Viruses', 'issn_l': '1999-4915', 'issn': ['1999-4915'], 'is_oa': True, 'is_in_doaj': True, 'is_core': True, 'host_organization': 'https://openalex.org/P4310310987', 'host_organization_name': 'Multidisciplinary Digital Publishing Institute', 'host_organization_lineage': ['https://openalex.org/P4310310987'], 'host_organization_lineage_names': ['Multidisciplinary Digital Publishing Institute'], 'type': 'journal'}, 'license': 'cc-by', 'license_id': 'https://openalex.org/licenses/cc-by', 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}, 'sustainable_development_goals': [{'id': 'https://metadata.un.org/sdg/3', 'display_name': 'Good health and well-being', 'score': 0.83}], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 34, 'referenced_works': ['https://openalex.org/W1547142901', 'https://openalex.org/W2055043387', 'https://openalex.org/W2067127476', 'https://openalex.org/W2074104070', 'https://openalex.org/W2096093282', 'https://openalex.org/W2101291993', 'https://openalex.org/W2127774996', 'https://openalex.org/W2133070552', 'https://openalex.org/W2170551349', 'https://openalex.org/W2323608404', 'https://openalex.org/W2587970647', 'https://openalex.org/W2951216011', 'https://openalex.org/W2952045272', 'https://openalex.org/W3003217347', 'https://openalex.org/W3013630838', 'https://openalex.org/W3015464552', 'https://openalex.org/W3016441544', 'https://openalex.org/W3026051701', 'https://openalex.org/W3033543779', 'https://openalex.org/W3036482641', 'https://openalex.org/W3048714202', 'https://openalex.org/W3083437130', 'https://openalex.org/W3083866604', 'https://openalex.org/W3084261392', 'https://openalex.org/W3085088149', 'https://openalex.org/W3087491011', 'https://openalex.org/W3092086688', 'https://openalex.org/W3093768302', 'https://openalex.org/W3096967708', 'https://openalex.org/W3117140450', 'https://openalex.org/W3126116861', 'https://openalex.org/W3127401064', 'https://openalex.org/W4210702584', 'https://openalex.org/W4212865600'], 'related_works': ['https://openalex.org/W4309908782', 'https://openalex.org/W4220714503', 'https://openalex.org/W3111061871', 'https://openalex.org/W2965072475', 'https://openalex.org/W2165640100', 'https://openalex.org/W2146188580', 'https://openalex.org/W2146014453', 'https://openalex.org/W2096465161', 'https://openalex.org/W1984111139', 'https://openalex.org/W1814949048'], 'abstract_inverted_index': {'SARS-CoV-2': [0, 22, 60, 94, 296], 'genomic': [1, 80], 'sequencing': [2], 'efforts': [3], 'have': [4], 'scaled': [5], 'dramatically': [6], 'to': [7, 126, 139, 214, 291, 302], 'address': [8], 'the': [9, 69, 101, 108, 199, 225, 279, 288], 'current': [10], 'global': [11], 'pandemic': [12], 'and': [13, 25, 34, 55, 76, 106, 119, 147, 153, 165, 171, 173, 190, 194, 216, 227, 245, 281], 'aid': [14], 'public': [15], 'health.': [16], 'However,': [17], 'autonomous': [18], 'genome': [19, 75, 95, 201], 'annotation': [20, 58], 'of': [21, 59, 71, 92, 111, 229, 243, 248, 278], 'genes,': [23], 'proteins,': [24], 'domains': [26], 'is': [27], 'not': [28, 66], 'readily': [29], 'accomplished': [30], 'by': [31, 65, 77], 'existing': [32], 'methods': [33], 'results': [35], 'in': [36, 122, 156, 198, 262], 'missing': [37], 'or': [38], 'incorrect': [39], 'sequences.': [40], 'To': [41], 'overcome': [42], 'this': [43, 188, 252], 'limitation,': [44], 'we': [45, 149, 207, 254], 'developed': [46], 'a': [47, 72, 151, 298], 'novel': [48], 'semi-supervised': [49], 'pipeline': [50], 'for': [51, 295], 'automated': [52], 'gene,': [53, 163], 'protein,': [54, 164], 'functional': [56], 'domain': [57, 166, 220, 282], 'genomes': [61, 238], 'that': [62, 82], 'differentiates': [63], 'itself': [64], 'relying': [67], 'on': [68, 185, 232], 'use': [70], 'single': [73], 'reference': [74], 'overcoming': [78], 'atypical': [79], 'traits': [81], 'challenge': [83], 'traditional': [84], 'bioinformatic': [85], 'methods.': [86], 'We': [87, 178, 222], 'analyzed': [88], 'an': [89, 233], 'initial': [90, 200], 'corpus': [91, 189], '66,000': [93], 'sequences': [96, 182, 266], 'collected': [97], 'from': [98], 'labs': [99], 'across': [100, 169], 'world': [102], 'using': [103], 'our': [104, 230, 263], 'method': [105, 160, 231, 301], 'identified': [107, 256], 'comprehensive': [109], 'set': [110, 116], 'known': [112], 'proteins': [113], 'with': [114, 267, 297], '98.5%': [115], 'membership': [117], 'accuracy': [118, 121, 271, 277], '99.1%': [120], 'length': [123], 'prediction,': [124], 'compared': [125], 'proteome': [127], 'references,': [128], 'including': [129], 'Replicase': [130], 'polyprotein': [131], '1ab': [132], '(with': [133], 'its': [134], 'transcriptional': [135], 'slippage': [136], 'site).': [137], 'Compared': [138], 'other': [140], 'published': [141], 'tools,': [142], 'such': [143], 'as': [144, 247, 272, 274, 307], 'Prokka': [145], '(base)': [146], 'VAPiD,': [148], 'yielded': [150], '6.4-': [152], '1.8-fold': [154], 'increase': [155], 'protein': [157, 184, 265, 280], 'annotations.': [158, 283], 'Our': [159], 'generated': [161], '13,000,000': [162], 'sequences-some': [167], 'conserved': [168], 'time': [170], 'geography': [172], 'others': [174], 'representing': [175], 'emerging': [176], 'variants.': [177, 221], 'observed': [179], '3362': [180], 'non-redundant': [181], 'per': [183], 'average': [186], 'within': [187], 'described': [191], 'key': [192], 'D614G': [193], 'N501Y': [195], 'variants': [196, 242], 'spatiotemporally': [197], 'corpus.': [202], 'For': [203], 'spike': [204, 259], 'glycoprotein': [205, 260], 'domains,': [206], 'achieved': [208], 'greater': [209, 268], 'than': [210, 269], '97.9%': [211], 'sequence': [212], 'identity': [213], 'references': [215], 'characterized': [217], 'receptor': [218], 'binding': [219], 'further': [223], 'demonstrated': [224], 'robustness': [226], 'extensibility': [228], 'additional': [234], '4000': [235], 'variant': [236], 'diverse': [237], 'containing': [239], 'all': [240, 257], 'named': [241], 'concern': [244], 'interest': [246], 'August': [249], '2021.': [250], 'In': [251], 'cohort,': [253], 'successfully': [255], 'keystone': [258], 'mutations': [261], 'predicted': [264], '99%': [270], 'well': [273], 'demonstrating': [275], 'high': [276], 'This': [284], 'work': [285], 'comprehensively': [286], 'presents': [287], 'molecular': [289], 'targets': [290], 'refine': [292], 'biomedical': [293], 'interventions': [294], 'scalable,': [299], 'high-accuracy': [300], 'analyze': [303], 'newly': [304], 'sequenced': [305], 'infections': [306], 'they': [308], 'arise.': [309]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W4200520333', 'counts_by_year': [{'year': 2024, 'cited_by_count': 1}, {'year': 2023, 'cited_by_count': 1}, {'year': 2022, 'cited_by_count': 4}], 'updated_date': '2024-12-10T09:43:33.406116', 'created_date': '2021-12-31'}