Speech recognition with deep recurrent neural networks

Name: Work Video:
Duration: 3 min 30 s
Alex Graves; Abdelrahman Mohamed; Geoffrey E. Hinton
{'id': 'https://openalex.org/W2143612262', 'doi': 'https://doi.org/10.1109/icassp.2013.6638947', 'title': 'Speech recognition with deep recurrent neural networks', 'display_name': 'Speech recognition with deep recurrent neural networks', 'publication_year': 2013, 'publication_date': '2013-05-01', 'ids': {'openalex': 'https://openalex.org/W2143612262', 'doi': 'https://doi.org/10.1109/icassp.2013.6638947', 'mag': '2143612262'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://doi.org/10.1109/icassp.2013.6638947', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4363607879', 'display_name': 'IEEE International Conference on Acoustics Speech and Signal Processing', 'issn_l': None, 'issn': None, 'is_oa': False, 'is_in_doaj': False, 'is_core': False, 'host_organization': None, 'host_organization_name': None, 'host_organization_lineage': [], 'host_organization_lineage_names': [], 'type': 'conference'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'article', 'type_crossref': 'proceedings-article', 'indexed_in': ['crossref'], 'open_access': {'is_oa': True, 'oa_status': 'green', 'oa_url': 'https://arxiv.org/pdf/1303.5778', 'any_repository_has_fulltext': True}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5043473089', 'display_name': 'Alex Graves', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I185261750', 'display_name': 'University of Toronto', 'ror': 'https://ror.org/03dbr7087', 'country_code': 'CA', 'type': 'education', 'lineage': ['https://openalex.org/I185261750']}], 'countries': ['CA'], 'is_corresponding': False, 'raw_author_name': 'Alex Graves', 'raw_affiliation_strings': ['DEPARTMENT OF COMPUTER SCIENCE, UNIVERSITY OF TORONTO, TORONTO, ON, CANADA'], 'affiliations': [{'raw_affiliation_string': 'DEPARTMENT OF COMPUTER SCIENCE, UNIVERSITY OF TORONTO, TORONTO, ON, CANADA', 'institution_ids': ['https://openalex.org/I185261750']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5103742478', 'display_name': 'Abdelrahman Mohamed', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I185261750', 'display_name': 'University of Toronto', 'ror': 'https://ror.org/03dbr7087', 'country_code': 'CA', 'type': 'education', 'lineage': ['https://openalex.org/I185261750']}], 'countries': ['CA'], 'is_corresponding': False, 'raw_author_name': 'Abdel-rahman Mohamed', 'raw_affiliation_strings': ['DEPARTMENT OF COMPUTER SCIENCE, UNIVERSITY OF TORONTO, TORONTO, ON, CANADA'], 'affiliations': [{'raw_affiliation_string': 'DEPARTMENT OF COMPUTER SCIENCE, UNIVERSITY OF TORONTO, TORONTO, ON, CANADA', 'institution_ids': ['https://openalex.org/I185261750']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5108093963', 'display_name': 'Geoffrey E. Hinton', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I185261750', 'display_name': 'University of Toronto', 'ror': 'https://ror.org/03dbr7087', 'country_code': 'CA', 'type': 'education', 'lineage': ['https://openalex.org/I185261750']}], 'countries': ['CA'], 'is_corresponding': False, 'raw_author_name': 'Geoffrey Hinton', 'raw_affiliation_strings': ['DEPARTMENT OF COMPUTER SCIENCE, UNIVERSITY OF TORONTO, TORONTO, ON, CANADA'], 'affiliations': [{'raw_affiliation_string': 'DEPARTMENT OF COMPUTER SCIENCE, UNIVERSITY OF TORONTO, TORONTO, ON, CANADA', 'institution_ids': ['https://openalex.org/I185261750']}]}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 1, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': None, 'apc_paid': None, 'fwci': 138.359, 'has_fulltext': True, 'fulltext_origin': 'ngrams', 'cited_by_count': 8234, 'citation_normalized_percentile': {'value': 0.999871, 'is_in_top_1_percent': True, 'is_in_top_10_percent': True}, 'cited_by_percentile_year': {'min': 99, 'max': 100}, 'biblio': {'volume': None, 'issue': None, 'first_page': '6645', 'last_page': '6649'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10201', 'display_name': 'Speech Recognition and Synthesis', 'score': 0.9999, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10201', 'display_name': 'Speech Recognition and Synthesis', 'score': 0.9999, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T11309', 'display_name': 'Music and Audio Processing', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1711', 'display_name': 'Signal Processing'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T10181', 'display_name': 'Natural Language Processing Techniques', 'score': 0.9992, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/connectionism', 'display_name': 'Connectionism', 'score': 0.67904735}, {'id': 'https://openalex.org/keywords/timit', 'display_name': 'TIMIT', 'score': 0.6582495}, {'id': 'https://openalex.org/keywords/benchmark', 'display_name': 'Benchmark (surveying)', 'score': 0.51187056}], 'concepts': [{'id': 'https://openalex.org/C147168706', 'wikidata': 'https://www.wikidata.org/wiki/Q1457734', 'display_name': 'Recurrent neural network', 'level': 3, 'score': 0.92652893}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.82395864}, {'id': 'https://openalex.org/C8521452', 'wikidata': 'https://www.wikidata.org/wiki/Q203790', 'display_name': 'Connectionism', 'level': 3, 'score': 0.67904735}, {'id': 'https://openalex.org/C2778724510', 'wikidata': 'https://www.wikidata.org/wiki/Q7670405', 'display_name': 'TIMIT', 'level': 3, 'score': 0.6582495}, {'id': 'https://openalex.org/C28490314', 'wikidata': 'https://www.wikidata.org/wiki/Q189436', 'display_name': 'Speech recognition', 'level': 1, 'score': 0.6298375}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.59778374}, {'id': 'https://openalex.org/C108583219', 'wikidata': 'https://www.wikidata.org/wiki/Q197536', 'display_name': 'Deep learning', 'level': 2, 'score': 0.5802343}, {'id': 'https://openalex.org/C2779343474', 'wikidata': 'https://www.wikidata.org/wiki/Q3109175', 'display_name': 'Context (archaeology)', 'level': 2, 'score': 0.5232453}, {'id': 'https://openalex.org/C185798385', 'wikidata': 'https://www.wikidata.org/wiki/Q1161707', 'display_name': 'Benchmark (surveying)', 'level': 2, 'score': 0.51187056}, {'id': 'https://openalex.org/C175202392', 'wikidata': 'https://www.wikidata.org/wiki/Q2434543', 'display_name': 'Time delay neural network', 'level': 3, 'score': 0.46035168}, {'id': 'https://openalex.org/C50644808', 'wikidata': 'https://www.wikidata.org/wiki/Q192776', 'display_name': 'Artificial neural network', 'level': 2, 'score': 0.4267023}, {'id': 'https://openalex.org/C23224414', 'wikidata': 'https://www.wikidata.org/wiki/Q176769', 'display_name': 'Hidden Markov model', 'level': 2, 'score': 0.34714407}, {'id': 'https://openalex.org/C153180895', 'wikidata': 'https://www.wikidata.org/wiki/Q7148389', 'display_name': 'Pattern recognition (psychology)', 'level': 2, 'score': 0.32677874}, {'id': 'https://openalex.org/C151730666', 'wikidata': 'https://www.wikidata.org/wiki/Q7205', 'display_name': 'Paleontology', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C13280743', 'wikidata': 'https://www.wikidata.org/wiki/Q131089', 'display_name': 'Geodesy', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C86803240', 'wikidata': 'https://www.wikidata.org/wiki/Q420', 'display_name': 'Biology', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C205649164', 'wikidata': 'https://www.wikidata.org/wiki/Q1071', 'display_name': 'Geography', 'level': 0, 'score': 0.0}], 'mesh': [], 'locations_count': 2, 'locations': [{'is_oa': False, 'landing_page_url': 'https://doi.org/10.1109/icassp.2013.6638947', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4363607879', 'display_name': 'IEEE International Conference on Acoustics Speech and Signal Processing', 'issn_l': None, 'issn': None, 'is_oa': False, 'is_in_doaj': False, 'is_core': False, 'host_organization': None, 'host_organization_name': None, 'host_organization_lineage': [], 'host_organization_lineage_names': [], 'type': 'conference'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, {'is_oa': True, 'landing_page_url': 'https://arxiv.org/abs/1303.5778', 'pdf_url': 'https://arxiv.org/pdf/1303.5778', 'source': {'id': 'https://openalex.org/S4306400194', 'display_name': 'arXiv (Cornell University)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I205783295', 'host_organization_name': 'Cornell University', 'host_organization_lineage': ['https://openalex.org/I205783295'], 'host_organization_lineage_names': ['Cornell University'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'submittedVersion', 'is_accepted': False, 'is_published': False}], 'best_oa_location': {'is_oa': True, 'landing_page_url': 'https://arxiv.org/abs/1303.5778', 'pdf_url': 'https://arxiv.org/pdf/1303.5778', 'source': {'id': 'https://openalex.org/S4306400194', 'display_name': 'arXiv (Cornell University)', 'issn_l': None, 'issn': None, 'is_oa': True, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/I205783295', 'host_organization_name': 'Cornell University', 'host_organization_lineage': ['https://openalex.org/I205783295'], 'host_organization_lineage_names': ['Cornell University'], 'type': 'repository'}, 'license': None, 'license_id': None, 'version': 'submittedVersion', 'is_accepted': False, 'is_published': False}, 'sustainable_development_goals': [{'id': 'https://metadata.un.org/sdg/4', 'score': 0.78, 'display_name': 'Quality education'}], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 27, 'referenced_works': ['https://openalex.org/W1553004968', 'https://openalex.org/W1828163288', 'https://openalex.org/W1860566644', 'https://openalex.org/W1964175594', 'https://openalex.org/W1993882792', 'https://openalex.org/W2047229728', 'https://openalex.org/W2064675550', 'https://openalex.org/W2077804127', 'https://openalex.org/W2079735306', 'https://openalex.org/W2101154665', 'https://openalex.org/W2108677974', 'https://openalex.org/W2110871230', 'https://openalex.org/W2115730999', 'https://openalex.org/W2117141689', 'https://openalex.org/W2127141656', 'https://openalex.org/W2131774270', 'https://openalex.org/W2136727248', 'https://openalex.org/W2141778357', 'https://openalex.org/W2144499799', 'https://openalex.org/W2155273149', 'https://openalex.org/W2160815625', 'https://openalex.org/W2167898728', 'https://openalex.org/W2170942820', 'https://openalex.org/W2184045248', 'https://openalex.org/W2290318471', 'https://openalex.org/W4285719527', 'https://openalex.org/W811578723'], 'related_works': ['https://openalex.org/W80018097', 'https://openalex.org/W3134920593', 'https://openalex.org/W2501000458', 'https://openalex.org/W2340308015', 'https://openalex.org/W2146842779', 'https://openalex.org/W2143247386', 'https://openalex.org/W2109916967', 'https://openalex.org/W2100729928', 'https://openalex.org/W1990589093', 'https://openalex.org/W1578749070'], 'abstract_inverted_index': {'Recurrent': [0], 'neural': [1, 82], 'networks': [2, 98], '(RNNs)': [3], 'are': [4], 'a': [5, 125], 'powerful': [6], 'model': [7], 'for': [8, 25], 'sequential': [9], 'data.': [10], 'End-to-end': [11], 'training': [12], 'methods': [13, 39], 'such': [14], 'as': [15], 'Connectionist': [16], 'Temporal': [17], 'Classification': [18], 'make': [19], 'it': [20], 'possible': [21], 'to': [22, 138], 'train': [23], 'RNNs': [24, 123], 'sequence': [26], 'labelling': [27], 'problems': [28], 'where': [29], 'the': [30, 41, 86, 100, 132, 142], 'input-output': [31], 'alignment': [32], 'is': [33, 141], 'unknown.': [34], 'The': [35], 'combination': [36], 'of': [37, 89, 103, 129], 'these': [38], 'with': [40, 69, 99, 113], 'Long': [42, 120], 'Short-term': [43, 121], 'Memory': [44, 122], 'RNN': [45, 59], 'architecture': [46], 'has': [47, 64], 'proved': [48, 93], 'particularly': [49], 'fruitful,': [50], 'delivering': [51], 'state-of-the-art': [52], 'results': [53, 71], 'in': [54, 61, 96], 'cursive': [55], 'handwriting': [56], 'recognition.': [57], 'However': [58], 'performance': [60], 'speech': [62], 'recognition': [63, 135], 'so': [65, 94], 'far': [66], 'been': [67], 'disappointing,': [68], 'better': [70], 'returned': [72], 'by': [73], 'deep': [74, 80, 97, 119], 'feedforward': [75], 'networks.': [76], 'This': [77], 'paper': [78], 'investigates': [79], 'recurrent': [81], 'networks,': [83], 'which': [84, 137], 'combine': [85], 'multiple': [87], 'levels': [88], 'representation': [90], 'that': [91, 107, 118], 'have': [92], 'effective': [95], 'flexible': [101], 'use': [102], 'long': [104], 'range': [105], 'context': [106], 'empowers': [108], 'RNNs.': [109], 'When': [110], 'trained': [111], 'end-to-end': [112], 'suitable': [114], 'regularisation,': [115], 'we': [116], 'find': [117], 'achieve': [124], 'test': [126], 'set': [127], 'error': [128], '17.7%': [130], 'on': [131], 'TIMIT': [133], 'phoneme': [134], 'benchmark,': [136], 'our': [139], 'knowledge': [140], 'best': [143], 'recorded': [144], 'score.': [145]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W2143612262', 'counts_by_year': [{'year': 2024, 'cited_by_count': 362}, {'year': 2023, 'cited_by_count': 608}, {'year': 2022, 'cited_by_count': 704}, {'year': 2021, 'cited_by_count': 1160}, {'year': 2020, 'cited_by_count': 1300}, {'year': 2019, 'cited_by_count': 1386}, {'year': 2018, 'cited_by_count': 1055}, {'year': 2017, 'cited_by_count': 688}, {'year': 2016, 'cited_by_count': 503}, {'year': 2015, 'cited_by_count': 249}, {'year': 2014, 'cited_by_count': 96}, {'year': 2013, 'cited_by_count': 14}, {'year': 2012, 'cited_by_count': 2}], 'updated_date': '2024-12-16T03:02:12.227912', 'created_date': '2016-06-24'}
Publication Information

Basic Information

Access and Citation

AI Researcher Chatbot

Primary Location

Authors

Topics

Keywords

Related Works