Get quick answers to your questions about the article from our AI researcher chatbot
{'id': 'https://openalex.org/W1977090527', 'doi': 'https://doi.org/10.1002/prot.23049', 'title': 'Internal organization of large protein families: Relationship between the sequence, structure, and function‐based clustering', 'display_name': 'Internal organization of large protein families: Relationship between the sequence, structure, and function‐based clustering', 'publication_year': 2011, 'publication_date': '2011-05-31', 'ids': {'openalex': 'https://openalex.org/W1977090527', 'doi': 'https://doi.org/10.1002/prot.23049', 'mag': '1977090527', 'pmid': 'https://pubmed.ncbi.nlm.nih.gov/21671455', 'pmcid': 'https://www.ncbi.nlm.nih.gov/pmc/articles/3132221'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://doi.org/10.1002/prot.23049', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S121161810', 'display_name': 'Proteins Structure Function and Bioinformatics', 'issn_l': '0887-3585', 'issn': ['0887-3585', '1097-0134'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320595', 'host_organization_name': 'Wiley', 'host_organization_lineage': ['https://openalex.org/P4310320595'], 'host_organization_lineage_names': ['Wiley'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': ['crossref', 'pubmed'], 'open_access': {'is_oa': True, 'oa_status': 'green', 'oa_url': 'https://europepmc.org/articles/pmc3132221?pdf=render', 'any_repository_has_fulltext': True}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5101927292', 'display_name': 'Xiaohui Cai', 'orcid': 'https://orcid.org/0000-0001-6343-1017'}, 'institutions': [{'id': 'https://openalex.org/I36258959', 'display_name': 'University of California, San Diego', 'ror': 'https://ror.org/0168r3w48', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I36258959']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Xiao-Hui Cai', 'raw_affiliation_strings': ['Joint Center for Structural Genomics, Center for Research in Biological Systems, University of California, San Diego, California 92093-0446, USA.'], 'affiliations': [{'raw_affiliation_string': 'Joint Center for Structural Genomics, Center for Research in Biological Systems, University of California, San Diego, California 92093-0446, USA.', 'institution_ids': ['https://openalex.org/I36258959']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5066090493', 'display_name': 'Lukasz Jaroszewski', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I188891513', 'display_name': 'Sanford Burnham Prebys Medical Discovery Institute', 'ror': 'https://ror.org/03m1g2s55', 'country_code': 'US', 'type': 'nonprofit', 'lineage': ['https://openalex.org/I188891513']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Lukasz Jaroszewski', 'raw_affiliation_strings': ['Joint Center for Structural Genomics, Bioinformatics Core, Sanford‐Burnham Medical Research Institute, La Jolla, California 92037'], 'affiliations': [{'raw_affiliation_string': 'Joint Center for Structural Genomics, Bioinformatics Core, Sanford‐Burnham Medical Research Institute, La Jolla, California 92037', 'institution_ids': ['https://openalex.org/I188891513']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5054419734', 'display_name': 'John Wooley', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I4210091204', 'display_name': 'Joint Center for Structural Genomics', 'ror': 'https://ror.org/00exr1241', 'country_code': 'US', 'type': 'other', 'lineage': ['https://openalex.org/I4210091204']}, {'id': 'https://openalex.org/I36258959', 'display_name': 'University of California, San Diego', 'ror': 'https://ror.org/0168r3w48', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I36258959']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'John Wooley', 'raw_affiliation_strings': ['Joint Center for Structural Genomics, Bioinformatics Core, Center for Research in Biological Systems, University of California, San Diego, California 92093-0446'], 'affiliations': [{'raw_affiliation_string': 'Joint Center for Structural Genomics, Bioinformatics Core, Center for Research in Biological Systems, University of California, San Diego, California 92093-0446', 'institution_ids': ['https://openalex.org/I4210091204', 'https://openalex.org/I36258959']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5010671148', 'display_name': 'Adam Godzik', 'orcid': 'https://orcid.org/0000-0002-2425-852X'}, 'institutions': [{'id': 'https://openalex.org/I4210091204', 'display_name': 'Joint Center for Structural Genomics', 'ror': 'https://ror.org/00exr1241', 'country_code': 'US', 'type': 'other', 'lineage': ['https://openalex.org/I4210091204']}, {'id': 'https://openalex.org/I36258959', 'display_name': 'University of California, San Diego', 'ror': 'https://ror.org/0168r3w48', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I36258959']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Adam Godzik', 'raw_affiliation_strings': ['Joint Center for Structural Genomics, Bioinformatics Core, Center for Research in Biological Systems, University of California, San Diego, California 92093‐0446'], 'affiliations': [{'raw_affiliation_string': 'Joint Center for Structural Genomics, Bioinformatics Core, Center for Research in Biological Systems, University of California, San Diego, California 92093‐0446', 'institution_ids': ['https://openalex.org/I4210091204', 'https://openalex.org/I36258959']}]}], 'countries_distinct_count': 1, 'institutions_distinct_count': 3, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': None, 'apc_paid': None, 'fwci': 0.273, 'has_fulltext': True, 'fulltext_origin': 'ngrams', 'cited_by_count': 5, 'citation_normalized_percentile': {'value': 0.593233, 'is_in_top_1_percent': False, 'is_in_top_10_percent': False}, 'cited_by_percentile_year': {'min': 80, 'max': 82}, 'biblio': {'volume': '79', 'issue': '8', 'first_page': '2389', 'last_page': '2402'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10044', 'display_name': 'Protein Structure Prediction and Analysis', 'score': 0.9982, 'subfield': {'id': 'https://openalex.org/subfields/1312', 'display_name': 'Molecular Biology'}, 'field': {'id': 'https://openalex.org/fields/13', 'display_name': 'Biochemistry, Genetics and Molecular Biology'}, 'domain': {'id': 'https://openalex.org/domains/1', 'display_name': 'Life Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10044', 'display_name': 'Protein Structure Prediction and Analysis', 'score': 0.9982, 'subfield': {'id': 'https://openalex.org/subfields/1312', 'display_name': 'Molecular Biology'}, 'field': {'id': 'https://openalex.org/fields/13', 'display_name': 'Biochemistry, Genetics and Molecular Biology'}, 'domain': {'id': 'https://openalex.org/domains/1', 'display_name': 'Life Sciences'}}, {'id': 'https://openalex.org/T10015', 'display_name': 'RNA Sequencing Data Analysis', 'score': 0.9976, 'subfield': {'id': 'https://openalex.org/subfields/1312', 'display_name': 'Molecular Biology'}, 'field': {'id': 'https://openalex.org/fields/13', 'display_name': 'Biochemistry, Genetics and Molecular Biology'}, 'domain': {'id': 'https://openalex.org/domains/1', 'display_name': 'Life Sciences'}}, {'id': 'https://openalex.org/T11162', 'display_name': 'Macromolecular Crystallography Techniques', 'score': 0.9952, 'subfield': {'id': 'https://openalex.org/subfields/2505', 'display_name': 'Materials Chemistry'}, 'field': {'id': 'https://openalex.org/fields/25', 'display_name': 'Materials Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/sequence', 'display_name': 'Sequence (biology)', 'score': 0.5997017}, {'id': 'https://openalex.org/keywords/structures', 'display_name': 'Structures', 'score': 0.540752}, {'id': 'https://openalex.org/keywords/enzyme-structure', 'display_name': 'Enzyme Structure', 'score': 0.538495}, {'id': 'https://openalex.org/keywords/proteins', 'display_name': 'Proteins', 'score': 0.527507}, {'id': 'https://openalex.org/keywords/similarity', 'display_name': 'Similarity (geometry)', 'score': 0.50645006}, {'id': 'https://openalex.org/keywords/sequence-alignment', 'display_name': 'sequence alignment', 'score': 0.504967}, {'id': 'https://openalex.org/keywords/functional-genomics', 'display_name': 'Functional Genomics', 'score': 0.500768}, {'id': 'https://openalex.org/keywords/protein-family', 'display_name': 'Protein family', 'score': 0.47911882}, {'id': 'https://openalex.org/keywords/protein-structure-database', 'display_name': 'Protein structure database', 'score': 0.47634462}, {'id': 'https://openalex.org/keywords/protein-function-prediction', 'display_name': 'Protein function prediction', 'score': 0.41297236}], 'concepts': [{'id': 'https://openalex.org/C2778112365', 'wikidata': 'https://www.wikidata.org/wiki/Q3511065', 'display_name': 'Sequence (biology)', 'level': 2, 'score': 0.5997017}, {'id': 'https://openalex.org/C70721500', 'wikidata': 'https://www.wikidata.org/wiki/Q177005', 'display_name': 'Computational biology', 'level': 1, 'score': 0.58662134}, {'id': 'https://openalex.org/C73555534', 'wikidata': 'https://www.wikidata.org/wiki/Q622825', 'display_name': 'Cluster analysis', 'level': 2, 'score': 0.57002187}, {'id': 'https://openalex.org/C103278499', 'wikidata': 'https://www.wikidata.org/wiki/Q254465', 'display_name': 'Similarity (geometry)', 'level': 3, 'score': 0.50645006}, {'id': 'https://openalex.org/C14036430', 'wikidata': 'https://www.wikidata.org/wiki/Q3736076', 'display_name': 'Function (biology)', 'level': 2, 'score': 0.49419406}, {'id': 'https://openalex.org/C171897839', 'wikidata': 'https://www.wikidata.org/wiki/Q417841', 'display_name': 'Protein family', 'level': 3, 'score': 0.47911882}, {'id': 'https://openalex.org/C136475424', 'wikidata': 'https://www.wikidata.org/wiki/Q7251500', 'display_name': 'Protein structure database', 'level': 4, 'score': 0.47634462}, {'id': 'https://openalex.org/C58773245', 'wikidata': 'https://www.wikidata.org/wiki/Q4832556', 'display_name': 'Structural Classification of Proteins database', 'level': 3, 'score': 0.47429588}, {'id': 'https://openalex.org/C47701112', 'wikidata': 'https://www.wikidata.org/wiki/Q735188', 'display_name': 'Protein structure', 'level': 2, 'score': 0.46915662}, {'id': 'https://openalex.org/C86803240', 'wikidata': 'https://www.wikidata.org/wiki/Q420', 'display_name': 'Biology', 'level': 0, 'score': 0.46524632}, {'id': 'https://openalex.org/C164866538', 'wikidata': 'https://www.wikidata.org/wiki/Q367351', 'display_name': 'Cluster (spacecraft)', 'level': 2, 'score': 0.42671558}, {'id': 'https://openalex.org/C207060522', 'wikidata': 'https://www.wikidata.org/wiki/Q7251473', 'display_name': 'Protein function prediction', 'level': 4, 'score': 0.41297236}, {'id': 'https://openalex.org/C78458016', 'wikidata': 'https://www.wikidata.org/wiki/Q840400', 'display_name': 'Evolutionary biology', 'level': 1, 'score': 0.38670805}, {'id': 'https://openalex.org/C54355233', 'wikidata': 'https://www.wikidata.org/wiki/Q7162', 'display_name': 'Genetics', 'level': 1, 'score': 0.32078058}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.28910977}, {'id': 'https://openalex.org/C2986374874', 'wikidata': 'https://www.wikidata.org/wiki/Q8054', 'display_name': 'Protein function', 'level': 3, 'score': 0.282838}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.15836877}, {'id': 'https://openalex.org/C104317684', 'wikidata': 'https://www.wikidata.org/wiki/Q7187', 'display_name': 'Gene', 'level': 2, 'score': 0.08184159}, {'id': 'https://openalex.org/C41584329', 'wikidata': 'https://www.wikidata.org/wiki/Q175902', 'display_name': 'Sequence database', 'level': 3, 'score': 0.07722014}, {'id': 'https://openalex.org/C55493867', 'wikidata': 'https://www.wikidata.org/wiki/Q7094', 'display_name': 'Biochemistry', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C115961682', 'wikidata': 'https://www.wikidata.org/wiki/Q860623', 'display_name': 'Image (mathematics)', 'level': 2, 'score': 0.0}, {'id': 'https://openalex.org/C199360897', 'wikidata': 'https://www.wikidata.org/wiki/Q9143', 'display_name': 'Programming language', 'level': 1, 'score': 0.0}], 'mesh': [{'descriptor_ui': 'D011506', 'descriptor_name': 'Proteins', 'qualifier_ui': 'Q000737', 'qualifier_name': 'chemistry', 'is_major_topic': True}, {'descriptor_ui': 'D016000', 'descriptor_name': 'Cluster Analysis', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D030562', 'descriptor_name': 'Databases, Protein', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}, {'descriptor_ui': 'D011506', 'descriptor_name': 'Proteins', 'qualifier_ui': '', 'qualifier_name': None, 'is_major_topic': False}], 'locations_count': 4, 'locations': [{'is_oa': False, 'landing_page_url': 'https://doi.org/10.1002/prot.23049', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S121161810', 'display_name': 'Proteins Structure Function and Bioinformatics', 'issn_l': '0887-3585', 'issn': ['0887-3585', '1097-0134'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320595', 'host_organization_name': 'Wiley', 'host_organization_lineage': ['https://openalex.org/P4310320595'], 'host_organization_lineage_names': ['Wiley'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'https://europepmc.org/articles/pmc3132221', 'pdf_url': 'https://europepmc.org/articles/pmc3132221?pdf=render', 'source': {'id': 'https://openalex.org/S4306400806', 'display_name': 'Europe PMC (PubMed Central)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I1303153112', 'host_organization_name': 'European Bioinformatics Institute', 'host_organization_lineage': ['https://openalex.org/I1303153112'], 'host_organization_lineage_names': ['European Bioinformatics Institute'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'acceptedVersion', 'is_accepted': True, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3132221', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S2764455111', 'display_name': 'PubMed Central', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I1299303238', 'host_organization_name': 'National Institutes of Health', 'host_organization_lineage': ['https://openalex.org/I1299303238'], 'host_organization_lineage_names': ['National Institutes of Health'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'acceptedVersion', 'is_accepted': True, 'is_published': False}, {'is_oa': False, 'landing_page_url': 'https://pubmed.ncbi.nlm.nih.gov/21671455', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4306525036', 'display_name': 'PubMed', 'issn_l': None, 'issn': None, 'is_oa': False, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I1299303238', 'host_organization_name': 'National Institutes of Health', 'host_organization_lineage': ['https://openalex.org/I1299303238'], 'host_organization_lineage_names': ['National Institutes of Health'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}], 'best_oa_location': {'is_oa': True, 'landing_page_url': 'https://europepmc.org/articles/pmc3132221', 'pdf_url': 'https://europepmc.org/articles/pmc3132221?pdf=render', 'source': {'id': 'https://openalex.org/S4306400806', 'display_name': 'Europe PMC (PubMed Central)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I1303153112', 'host_organization_name': 'European Bioinformatics Institute', 'host_organization_lineage': ['https://openalex.org/I1303153112'], 'host_organization_lineage_names': ['European Bioinformatics Institute'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'acceptedVersion', 'is_accepted': True, 'is_published': False}, 'sustainable_development_goals': [], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 37, 'referenced_works': ['https://openalex.org/W1582255757', 'https://openalex.org/W1791999417', 'https://openalex.org/W1964903151', 'https://openalex.org/W1991910500', 'https://openalex.org/W2030776726', 'https://openalex.org/W2031239680', 'https://openalex.org/W2040548339', 'https://openalex.org/W2045564656', 'https://openalex.org/W2051019453', 'https://openalex.org/W2055043387', 'https://openalex.org/W2057447180', 'https://openalex.org/W2060446072', 'https://openalex.org/W2073338313', 'https://openalex.org/W2079093876', 'https://openalex.org/W2086130370', 'https://openalex.org/W2087254300', 'https://openalex.org/W2088638514', 'https://openalex.org/W2089085945', 'https://openalex.org/W2089999388', 'https://openalex.org/W2097642323', 'https://openalex.org/W2099075703', 'https://openalex.org/W2108067237', 'https://openalex.org/W2114999652', 'https://openalex.org/W2124871329', 'https://openalex.org/W2128653811', 'https://openalex.org/W2141885858', 'https://openalex.org/W2146134187', 'https://openalex.org/W2147667050', 'https://openalex.org/W2151631782', 'https://openalex.org/W2153018384', 'https://openalex.org/W2156125289', 'https://openalex.org/W2158714788', 'https://openalex.org/W2163895365', 'https://openalex.org/W2169097687', 'https://openalex.org/W2170672962', 'https://openalex.org/W4210623056', 'https://openalex.org/W95014119'], 'related_works': ['https://openalex.org/W853003716', 'https://openalex.org/W4235848672', 'https://openalex.org/W2800194647', 'https://openalex.org/W2593264178', 'https://openalex.org/W2183523499', 'https://openalex.org/W2155238244', 'https://openalex.org/W1989756167', 'https://openalex.org/W1535721329', 'https://openalex.org/W1514151181', 'https://openalex.org/W1504467230'], 'abstract_inverted_index': {'Abstract': [0], 'The': [1, 183], 'protein': [2, 79, 91, 156, 168], 'universe': [3], 'can': [4, 187], 'be': [5, 62, 173], 'organized': [6], 'in': [7, 46, 85, 147, 155], 'families': [8, 16, 92], 'that': [9, 110, 170, 185], 'group': [10], 'proteins': [11, 68, 130], 'sharing': [12], 'common': [13], 'ancestry.': [14], 'Such': [15], 'display': [17], 'variable': [18], 'levels': [19], 'of': [20, 55, 67, 97, 129, 150, 167, 180, 203], 'structural': [21, 151, 178], 'and': [22, 35, 48, 57, 93, 114, 152], 'functional': [23, 153], 'divergence,': [24], 'from': [25, 205], 'homogenous': [26], 'families,': [27, 42], 'where': [28, 43], 'all': [29], 'members': [30], 'have': [31, 171], 'the': [32, 137, 165], 'same': [33], 'function': [34, 47, 58], 'very': [36, 40], 'similar': [37, 71, 132], 'structure,': [38], 'to': [39, 64, 88, 172, 175], 'divergent': [41, 78], 'large': [44, 90], 'variations': [45], 'structure': [49, 56], 'are': [50], 'observed.': [51], 'For': [52], 'practical': [53], 'purposes': [54], 'prediction,': [59], 'it': [60], 'would': [61], 'beneficial': [63], 'identify': [65, 102], 'sub‐groups': [66], 'with': [69, 131, 143], 'highly': [70], 'structures': [72, 126, 169], '(iso‐structural)': [73], 'and/or': [74], 'functions': [75], '(iso‐functional)': [76], 'within': [77], 'families.': [80, 157, 182], 'We': [81, 108], 'compared': [82], 'three': [83], 'algorithms': [84], 'their': [86], 'ability': [87], 'cluster': [89], 'discuss': [94], 'whether': [95], 'any': [96], 'these': [98, 161], 'methods': [99, 117, 142, 162], 'could': [100], 'reliably': [101], 'such': [103, 181], 'iso‐structural': [104], 'or': [105, 127], 'iso‐functional': [106], 'groups.': [107], 'show': [109], 'clustering': [111], 'using': [112], 'profile‐sequence': [113], 'profile–profile': [115], 'comparison': [116], 'closely': [118], 'reproduces': [119], 'clusters': [120, 128], 'based': [121, 191], 'on': [122, 192], 'similarities': [123], 'between': [124], '3D': [125], 'biological': [133], 'functions.': [134], 'In': [135], 'contrast,': [136], 'still': [138], 'commonly': [139], 'used': [140], 'sequence‐based': [141], 'fixed': [144], 'thresholds': [145], 'result': [146], 'vast': [148], 'overestimates': [149], 'diversity': [154], 'As': [158], 'a': [159], 'result,': [160], 'also': [163], 'overestimate': [164], 'number': [166], 'determined': [174], 'fully': [176], 'characterize': [177], 'space': [179], 'fact': [184], 'one': [186], 'build': [188], 'reliable': [189], 'models': [190], 'apparently': [193], 'distantly': [194], 'related': [195], 'templates': [196], 'is': [197], 'crucial': [198], 'for': [199], 'extracting': [200], 'maximal': [201], 'amount': [202], 'information': [204], 'new': [206], 'sequencing': [207], 'projects.': [208], 'Proteins': [209], '2011;': [210], '©': [211], '2011': [212], 'Wiley‐Liss,': [213], 'Inc.': [214]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W1977090527', 'counts_by_year': [{'year': 2024, 'cited_by_count': 1}, {'year': 2023, 'cited_by_count': 1}, {'year': 2021, 'cited_by_count': 1}, {'year': 2014, 'cited_by_count': 1}, {'year': 2012, 'cited_by_count': 1}], 'updated_date': '2024-08-15T11:59:06.115947', 'created_date': '2016-06-24'}