Acoustic Modeling Using Deep Belief Networks

Abdelrahman Mohamed; George E. Dahl; Geoffrey E. Hinton
{'id': 'https://openalex.org/W1993882792', 'doi': 'https://doi.org/10.1109/tasl.2011.2109382', 'title': 'Acoustic Modeling Using Deep Belief Networks', 'display_name': 'Acoustic Modeling Using Deep Belief Networks', 'publication_year': 2011, 'publication_date': '2011-01-31', 'ids': {'openalex': 'https://openalex.org/W1993882792', 'doi': 'https://doi.org/10.1109/tasl.2011.2109382', 'mag': '1993882792'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://doi.org/10.1109/tasl.2011.2109382', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S199497470', 'display_name': 'IEEE Transactions on Audio Speech and Language Processing', 'issn_l': '1558-7916', 'issn': ['1558-7916', '1558-7924'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310319808', 'host_organization_name': 'Institute of Electrical and Electronics Engineers', 'host_organization_lineage': ['https://openalex.org/P4310319808'], 'host_organization_lineage_names': ['Institute of Electrical and Electronics Engineers'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': ['crossref'], 'open_access': {'is_oa': False, 'oa_status': 'closed', 'oa_url': None, 'any_repository_has_fulltext': False}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5103742478', 'display_name': 'Abdelrahman Mohamed', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I185261750', 'display_name': 'University of Toronto', 'ror': 'https://ror.org/03dbr7087', 'country_code': 'CA', 'type': 'education', 'lineage': ['https://openalex.org/I185261750']}], 'countries': ['CA'], 'is_corresponding': False, 'raw_author_name': 'Abdel-rahman Mohamed', 'raw_affiliation_strings': ['University of Toronto, Toronto, On, Canada'], 'affiliations': [{'raw_affiliation_string': 'University of Toronto, Toronto, On, Canada', 'institution_ids': ['https://openalex.org/I185261750']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5047062711', 'display_name': 'George E. Dahl', 'orcid': 'https://orcid.org/0000-0002-0083-844X'}, 'institutions': [{'id': 'https://openalex.org/I185261750', 'display_name': 'University of Toronto', 'ror': 'https://ror.org/03dbr7087', 'country_code': 'CA', 'type': 'education', 'lineage': ['https://openalex.org/I185261750']}], 'countries': ['CA'], 'is_corresponding': False, 'raw_author_name': 'George E. Dahl', 'raw_affiliation_strings': ['University of Toronto, Toronto, On, Canada'], 'affiliations': [{'raw_affiliation_string': 'University of Toronto, Toronto, On, Canada', 'institution_ids': ['https://openalex.org/I185261750']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5024209719', 'display_name': 'Geoffrey E. Hinton', 'orcid': 'https://orcid.org/0000-0001-9789-9355'}, 'institutions': [{'id': 'https://openalex.org/I185261750', 'display_name': 'University of Toronto', 'ror': 'https://ror.org/03dbr7087', 'country_code': 'CA', 'type': 'education', 'lineage': ['https://openalex.org/I185261750']}], 'countries': ['CA'], 'is_corresponding': False, 'raw_author_name': 'Geoffrey Hinton', 'raw_affiliation_strings': ['University of Toronto, Toronto, On, Canada'], 'affiliations': [{'raw_affiliation_string': 'University of Toronto, Toronto, On, Canada', 'institution_ids': ['https://openalex.org/I185261750']}]}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 1, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': None, 'apc_paid': None, 'fwci': 212.536, 'has_fulltext': True, 'fulltext_origin': 'ngrams', 'cited_by_count': 1693, 'citation_normalized_percentile': {'value': 0.999968, 'is_in_top_1_percent': True, 'is_in_top_10_percent': True}, 'cited_by_percentile_year': {'min': 99, 'max': 100}, 'biblio': {'volume': '20', 'issue': '1', 'first_page': '14', 'last_page': '22'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10201', 'display_name': 'Speech Recognition Technology', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10201', 'display_name': 'Speech Recognition Technology', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T10860', 'display_name': 'Speech Enhancement Techniques', 'score': 0.9996, 'subfield': {'id': 'https://openalex.org/subfields/1711', 'display_name': 'Signal Processing'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T11309', 'display_name': 'Audio Signal Classification and Analysis', 'score': 0.9995, 'subfield': {'id': 'https://openalex.org/subfields/1711', 'display_name': 'Signal Processing'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/discriminative-model', 'display_name': 'Discriminative model', 'score': 0.86521083}, {'id': 'https://openalex.org/keywords/timit', 'display_name': 'TIMIT', 'score': 0.83992064}, {'id': 'https://openalex.org/keywords/deep-belief-network', 'display_name': 'Deep belief network', 'score': 0.6626528}, {'id': 'https://openalex.org/keywords/acoustic-modeling', 'display_name': 'Acoustic Modeling', 'score': 0.621584}, {'id': 'https://openalex.org/keywords/hidden-markov-models', 'display_name': 'Hidden Markov Models', 'score': 0.618384}, {'id': 'https://openalex.org/keywords/feature', 'display_name': 'Feature (linguistics)', 'score': 0.5523485}, {'id': 'https://openalex.org/keywords/audio-visual-speech-recognition', 'display_name': 'Audio-Visual Speech Recognition', 'score': 0.540186}, {'id': 'https://openalex.org/keywords/audio-event-detection', 'display_name': 'Audio Event Detection', 'score': 0.524283}, {'id': 'https://openalex.org/keywords/statistical-language-modeling', 'display_name': 'Statistical Language Modeling', 'score': 0.522338}, {'id': 'https://openalex.org/keywords/backpropagation', 'display_name': 'Backpropagation', 'score': 0.46950483}, {'id': 'https://openalex.org/keywords/generative-model', 'display_name': 'Generative model', 'score': 0.46759552}], 'concepts': [{'id': 'https://openalex.org/C97931131', 'wikidata': 'https://www.wikidata.org/wiki/Q5282087', 'display_name': 'Discriminative model', 'level': 2, 'score': 0.86521083}, {'id': 'https://openalex.org/C23224414', 'wikidata': 'https://www.wikidata.org/wiki/Q176769', 'display_name': 'Hidden Markov model', 'level': 2, 'score': 0.85212517}, {'id': 'https://openalex.org/C2778724510', 'wikidata': 'https://www.wikidata.org/wiki/Q7670405', 'display_name': 'TIMIT', 'level': 3, 'score': 0.83992064}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.71136355}, {'id': 'https://openalex.org/C97385483', 'wikidata': 'https://www.wikidata.org/wiki/Q16954980', 'display_name': 'Deep belief network', 'level': 3, 'score': 0.6626528}, {'id': 'https://openalex.org/C153180895', 'wikidata': 'https://www.wikidata.org/wiki/Q7148389', 'display_name': 'Pattern recognition (psychology)', 'level': 2, 'score': 0.6571331}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.6537189}, {'id': 'https://openalex.org/C61224824', 'wikidata': 'https://www.wikidata.org/wiki/Q2260434', 'display_name': 'Mixture model', 'level': 2, 'score': 0.6330036}, {'id': 'https://openalex.org/C2776401178', 'wikidata': 'https://www.wikidata.org/wiki/Q12050496', 'display_name': 'Feature (linguistics)', 'level': 2, 'score': 0.5523485}, {'id': 'https://openalex.org/C50644808', 'wikidata': 'https://www.wikidata.org/wiki/Q192776', 'display_name': 'Artificial neural network', 'level': 2, 'score': 0.54330534}, {'id': 'https://openalex.org/C163716315', 'wikidata': 'https://www.wikidata.org/wiki/Q901177', 'display_name': 'Gaussian', 'level': 2, 'score': 0.48820573}, {'id': 'https://openalex.org/C39890363', 'wikidata': 'https://www.wikidata.org/wiki/Q36108', 'display_name': 'Generative grammar', 'level': 2, 'score': 0.48044845}, {'id': 'https://openalex.org/C28490314', 'wikidata': 'https://www.wikidata.org/wiki/Q189436', 'display_name': 'Speech recognition', 'level': 1, 'score': 0.4724731}, {'id': 'https://openalex.org/C155032097', 'wikidata': 'https://www.wikidata.org/wiki/Q798503', 'display_name': 'Backpropagation', 'level': 3, 'score': 0.46950483}, {'id': 'https://openalex.org/C167966045', 'wikidata': 'https://www.wikidata.org/wiki/Q5532625', 'display_name': 'Generative model', 'level': 3, 'score': 0.46759552}, {'id': 'https://openalex.org/C163836022', 'wikidata': 'https://www.wikidata.org/wiki/Q6771326', 'display_name': 'Markov model', 'level': 3, 'score': 0.45447785}, {'id': 'https://openalex.org/C119857082', 'wikidata': 'https://www.wikidata.org/wiki/Q2539', 'display_name': 'Machine learning', 'level': 1, 'score': 0.35208237}, {'id': 'https://openalex.org/C98763669', 'wikidata': 'https://www.wikidata.org/wiki/Q176645', 'display_name': 'Markov chain', 'level': 2, 'score': 0.335901}, {'id': 'https://openalex.org/C41895202', 'wikidata': 'https://www.wikidata.org/wiki/Q8162', 'display_name': 'Linguistics', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C138885662', 'wikidata': 'https://www.wikidata.org/wiki/Q5891', 'display_name': 'Philosophy', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C121332964', 'wikidata': 'https://www.wikidata.org/wiki/Q413', 'display_name': 'Physics', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C62520636', 'wikidata': 'https://www.wikidata.org/wiki/Q944', 'display_name': 'Quantum mechanics', 'level': 1, 'score': 0.0}], 'mesh': [], 'locations_count': 1, 'locations': [{'is_oa': False, 'landing_page_url': 'https://doi.org/10.1109/tasl.2011.2109382', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S199497470', 'display_name': 'IEEE Transactions on Audio Speech and Language Processing', 'issn_l': '1558-7916', 'issn': ['1558-7916', '1558-7924'], 'is_oa': False, 'is_in_doaj': False, 'is_core': True, 'host_organization': 'https://openalex.org/P4310319808', 'host_organization_name': 'Institute of Electrical and Electronics Engineers', 'host_organization_lineage': ['https://openalex.org/P4310319808'], 'host_organization_lineage_names': ['Institute of Electrical and Electronics Engineers'], 'type': 'journal'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}], 'best_oa_location': None, 'sustainable_development_goals': [{'id': 'https://metadata.un.org/sdg/10', 'score': 0.77, 'display_name': 'Reduced inequalities'}], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 42, 'referenced_works': ['https://openalex.org/W137106866', 'https://openalex.org/W1538748823', 'https://openalex.org/W1553004968', 'https://openalex.org/W155708904', 'https://openalex.org/W1562289873', 'https://openalex.org/W1647054946', 'https://openalex.org/W190289757', 'https://openalex.org/W1993845689', 'https://openalex.org/W1994197834', 'https://openalex.org/W2014641584', 'https://openalex.org/W2027915610', 'https://openalex.org/W2071489795', 'https://openalex.org/W2077804127', 'https://openalex.org/W2083380015', 'https://openalex.org/W2100495367', 'https://openalex.org/W2102512139', 'https://openalex.org/W2103359087', 'https://openalex.org/W2104997912', 'https://openalex.org/W2110871230', 'https://openalex.org/W2116064496', 'https://openalex.org/W2117366282', 'https://openalex.org/W2125534887', 'https://openalex.org/W2131700150', 'https://openalex.org/W2131854866', 'https://openalex.org/W2136922672', 'https://openalex.org/W2139671364', 'https://openalex.org/W2141778357', 'https://openalex.org/W2142773971', 'https://openalex.org/W2147768505', 'https://openalex.org/W2150529939', 'https://openalex.org/W2155342818', 'https://openalex.org/W2159080219', 'https://openalex.org/W2161893161', 'https://openalex.org/W2165119652', 'https://openalex.org/W2766545043', 'https://openalex.org/W2913932916', 'https://openalex.org/W2990138404', 'https://openalex.org/W3118608800', 'https://openalex.org/W44815768', 'https://openalex.org/W66838807', 'https://openalex.org/W73112891', 'https://openalex.org/W99063960'], 'related_works': ['https://openalex.org/W643545633', 'https://openalex.org/W3160080723', 'https://openalex.org/W2887732792', 'https://openalex.org/W2474633151', 'https://openalex.org/W2172097686', 'https://openalex.org/W2105153012', 'https://openalex.org/W2085928016', 'https://openalex.org/W2052112670', 'https://openalex.org/W2048014685', 'https://openalex.org/W2002052740'], 'abstract_inverted_index': {'Gaussian': [0, 35], 'mixture': [1, 36], 'models': [2, 16, 37], 'are': [3, 57], 'currently': [4], 'the': [5, 10, 27, 80, 85, 95, 108], 'dominant': [6], 'technique': [7], 'for': [8, 17], 'modeling': [9], 'emission': [11], 'distribution': [12, 106], 'of': [13, 46, 53, 65, 68, 75, 110], 'hidden': [14, 112], 'Markov': [15, 113], 'speech': [18], 'recognition.': [19], 'We': [20], 'show': [21], 'that': [22, 42], 'better': [23, 101], 'phone': [24], 'recognition': [25], 'on': [26], 'TIMIT': [28], 'dataset': [29], 'can': [30], 'be': [31], 'achieved': [32], 'by': [33, 38], 'replacing': [34], 'deep': [39], 'neural': [40], 'networks': [41, 56], 'contain': [43], 'many': [44], 'layers': [45], 'features': [47, 96], 'and': [48], 'a': [49, 61, 66, 104], 'very': [50], 'large': [51], 'number': [52], 'parameters.': [54], 'These': [55], 'first': [58], 'pre-trained': [59], 'as': [60], 'multi-layer': [62], 'generative': [63, 81], 'model': [64], 'window': [67], 'spectral': [69], 'feature': [70], 'vectors': [71], 'without': [72], 'making': [73], 'use': [74], 'any': [76], 'discriminative': [77, 89], 'information.': [78], 'Once': [79], 'pre-training': [82], 'has': [83], 'designed': [84], 'features,': [86], 'we': [87], 'perform': [88], 'fine-tuning': [90], 'using': [91], 'backpropagation': [92], 'to': [93, 98], 'adjust': [94], 'slightly': [97], 'make': [99], 'them': [100], 'at': [102], 'predicting': [103], 'probability': [105], 'over': [107], 'states': [109], 'monophone': [111], 'models.': [114]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W1993882792', 'counts_by_year': [{'year': 2024, 'cited_by_count': 30}, {'year': 2023, 'cited_by_count': 71}, {'year': 2022, 'cited_by_count': 92}, {'year': 2021, 'cited_by_count': 116}, {'year': 2020, 'cited_by_count': 148}, {'year': 2019, 'cited_by_count': 164}, {'year': 2018, 'cited_by_count': 196}, {'year': 2017, 'cited_by_count': 156}, {'year': 2016, 'cited_by_count': 151}, {'year': 2015, 'cited_by_count': 196}, {'year': 2014, 'cited_by_count': 174}, {'year': 2013, 'cited_by_count': 125}, {'year': 2012, 'cited_by_count': 51}], 'updated_date': '2024-09-18T16:08:52.000856', 'created_date': '2016-06-24'}
Publication Information

Basic Information

Access and Citation

AI Researcher Chatbot

Primary Location

Authors

Topics

Keywords

Related Works