Learning Without State-Estimation in Partially Observable Markovian Decision Processes

Satinder Singh; Tommi Jaakkola; Michael I. Jordan
{'id': 'https://openalex.org/W1541084404', 'doi': 'https://doi.org/10.1016/b978-1-55860-335-6.50042-8', 'title': 'Learning Without State-Estimation in Partially Observable Markovian Decision Processes', 'display_name': 'Learning Without State-Estimation in Partially Observable Markovian Decision Processes', 'publication_year': 1994, 'publication_date': '1994-01-01', 'ids': {'openalex': 'https://openalex.org/W1541084404', 'doi': 'https://doi.org/10.1016/b978-1-55860-335-6.50042-8', 'mag': '1541084404'}, 'language': 'en', 'primary_location': {'is_oa': False, 'landing_page_url': 'https://doi.org/10.1016/b978-1-55860-335-6.50042-8', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4306463230', 'display_name': 'Elsevier eBooks', 'issn_l': None, 'issn': None, 'is_oa': False, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/P4310320990', 'host_organization_name': 'Elsevier BV', 'host_organization_lineage': ['https://openalex.org/P4310320990'], 'host_organization_lineage_names': ['Elsevier BV'], 'type': 'ebook platform'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}, 'type': 'book-chapter', 'type_crossref': 'book-chapter', 'indexed_in': ['crossref'], 'open_access': {'is_oa': False, 'oa_status': 'closed', 'oa_url': None, 'any_repository_has_fulltext': False}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5065366930', 'display_name': 'Satinder Singh', 'orcid': 'https://orcid.org/0000-0002-2736-7641'}, 'institutions': [{'id': 'https://openalex.org/I63966007', 'display_name': 'Massachusetts Institute of Technology', 'ror': 'https://ror.org/042nb2s44', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I63966007']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Satinder P. Singh', 'raw_affiliation_strings': ['Department of Brain and Cognitive Sciences (E10), Massachusetts Institute of Technology, Cambridge, MA 02139'], 'affiliations': [{'raw_affiliation_string': 'Department of Brain and Cognitive Sciences (E10), Massachusetts Institute of Technology, Cambridge, MA 02139', 'institution_ids': ['https://openalex.org/I63966007']}]}, {'author_position': 'middle', 'author': {'id': 'https://openalex.org/A5048915657', 'display_name': 'Tommi Jaakkola', 'orcid': 'https://orcid.org/0000-0002-2199-0379'}, 'institutions': [{'id': 'https://openalex.org/I63966007', 'display_name': 'Massachusetts Institute of Technology', 'ror': 'https://ror.org/042nb2s44', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I63966007']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Tommi S. Jaakkola', 'raw_affiliation_strings': ['Department of Brain and Cognitive Sciences (E10), Massachusetts Institute of Technology, Cambridge, MA 02139'], 'affiliations': [{'raw_affiliation_string': 'Department of Brain and Cognitive Sciences (E10), Massachusetts Institute of Technology, Cambridge, MA 02139', 'institution_ids': ['https://openalex.org/I63966007']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5049812527', 'display_name': 'Michael I. Jordan', 'orcid': 'https://orcid.org/0000-0001-8935-817X'}, 'institutions': [], 'countries': [], 'is_corresponding': False, 'raw_author_name': 'Michael I. Jordan', 'raw_affiliation_strings': [], 'affiliations': []}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 1, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': None, 'apc_paid': None, 'fwci': 3.695, 'has_fulltext': True, 'fulltext_origin': 'ngrams', 'cited_by_count': 331, 'citation_normalized_percentile': {'value': 0.992303, 'is_in_top_1_percent': True, 'is_in_top_10_percent': True}, 'cited_by_percentile_year': {'min': 98, 'max': 99}, 'biblio': {'volume': None, 'issue': None, 'first_page': '284', 'last_page': '292'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10462', 'display_name': 'Reinforcement Learning Algorithms', 'score': 0.9983, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10462', 'display_name': 'Reinforcement Learning Algorithms', 'score': 0.9983, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T12072', 'display_name': 'Active Learning in Machine Learning Research', 'score': 0.9856, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T12101', 'display_name': 'Optimization of Multi-Armed Bandit Problems', 'score': 0.9771, 'subfield': {'id': 'https://openalex.org/subfields/1803', 'display_name': 'Management Science and Operations Research'}, 'field': {'id': 'https://openalex.org/fields/18', 'display_name': 'Decision Sciences'}, 'domain': {'id': 'https://openalex.org/domains/2', 'display_name': 'Social Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/semi-supervised-learning', 'display_name': 'Semi-Supervised Learning', 'score': 0.560002}, {'id': 'https://openalex.org/keywords/reinforcement-learning', 'display_name': 'Reinforcement Learning', 'score': 0.553264}], 'concepts': [{'id': 'https://openalex.org/C97541855', 'wikidata': 'https://www.wikidata.org/wiki/Q830687', 'display_name': 'Reinforcement learning', 'level': 2, 'score': 0.8097526}, {'id': 'https://openalex.org/C106189395', 'wikidata': 'https://www.wikidata.org/wiki/Q176789', 'display_name': 'Markov decision process', 'level': 3, 'score': 0.8085779}, {'id': 'https://openalex.org/C32848918', 'wikidata': 'https://www.wikidata.org/wiki/Q845789', 'display_name': 'Observable', 'level': 2, 'score': 0.768059}, {'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.69361603}, {'id': 'https://openalex.org/C17098449', 'wikidata': 'https://www.wikidata.org/wiki/Q176814', 'display_name': 'Partially observable Markov decision process', 'level': 4, 'score': 0.5575964}, {'id': 'https://openalex.org/C48103436', 'wikidata': 'https://www.wikidata.org/wiki/Q599031', 'display_name': 'State (computer science)', 'level': 2, 'score': 0.5464857}, {'id': 'https://openalex.org/C159886148', 'wikidata': 'https://www.wikidata.org/wiki/Q176645', 'display_name': 'Markov process', 'level': 2, 'score': 0.5404476}, {'id': 'https://openalex.org/C72434380', 'wikidata': 'https://www.wikidata.org/wiki/Q230930', 'display_name': 'State space', 'level': 2, 'score': 0.5386408}, {'id': 'https://openalex.org/C126255220', 'wikidata': 'https://www.wikidata.org/wiki/Q141495', 'display_name': 'Mathematical optimization', 'level': 1, 'score': 0.4922079}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.4537691}, {'id': 'https://openalex.org/C2777212361', 'wikidata': 'https://www.wikidata.org/wiki/Q5127848', 'display_name': 'Class (philosophy)', 'level': 2, 'score': 0.41364926}, {'id': 'https://openalex.org/C33923547', 'wikidata': 'https://www.wikidata.org/wiki/Q395', 'display_name': 'Mathematics', 'level': 0, 'score': 0.1850344}, {'id': 'https://openalex.org/C11413529', 'wikidata': 'https://www.wikidata.org/wiki/Q8366', 'display_name': 'Algorithm', 'level': 1, 'score': 0.12794316}, {'id': 'https://openalex.org/C105795698', 'wikidata': 'https://www.wikidata.org/wiki/Q12483', 'display_name': 'Statistics', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C121332964', 'wikidata': 'https://www.wikidata.org/wiki/Q413', 'display_name': 'Physics', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C62520636', 'wikidata': 'https://www.wikidata.org/wiki/Q944', 'display_name': 'Quantum mechanics', 'level': 1, 'score': 0.0}], 'mesh': [], 'locations_count': 1, 'locations': [{'is_oa': False, 'landing_page_url': 'https://doi.org/10.1016/b978-1-55860-335-6.50042-8', 'pdf_url': None, 'source': {'id': 'https://openalex.org/S4306463230', 'display_name': 'Elsevier eBooks', 'issn_l': None, 'issn': None, 'is_oa': False, 'is_in_doaj': False, 'is_core': False, 'host_organization': 'https://openalex.org/P4310320990', 'host_organization_name': 'Elsevier BV', 'host_organization_lineage': ['https://openalex.org/P4310320990'], 'host_organization_lineage_names': ['Elsevier BV'], 'type': 'ebook platform'}, 'license': None, 'license_id': None, 'version': None, 'is_accepted': False, 'is_published': False}], 'best_oa_location': None, 'sustainable_development_goals': [{'id': 'https://metadata.un.org/sdg/16', 'score': 0.78, 'display_name': 'Peace, justice, and strong institutions'}], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 22, 'referenced_works': ['https://openalex.org/W1491843047', 'https://openalex.org/W1499371387', 'https://openalex.org/W1500024457', 'https://openalex.org/W1545148916', 'https://openalex.org/W1549353711', 'https://openalex.org/W1555801537', 'https://openalex.org/W1557517019', 'https://openalex.org/W1593772383', 'https://openalex.org/W1657674574', 'https://openalex.org/W2021801581', 'https://openalex.org/W2044375425', 'https://openalex.org/W2061361125', 'https://openalex.org/W2064018461', 'https://openalex.org/W2091565802', 'https://openalex.org/W2100677568', 'https://openalex.org/W2101242010', 'https://openalex.org/W2126217565', 'https://openalex.org/W2135630072', 'https://openalex.org/W2158091072', 'https://openalex.org/W3041202696', 'https://openalex.org/W4240813273', 'https://openalex.org/W4298856952'], 'related_works': ['https://openalex.org/W52153049', 'https://openalex.org/W4323315247', 'https://openalex.org/W2999848267', 'https://openalex.org/W2951545791', 'https://openalex.org/W2294884454', 'https://openalex.org/W2096013579', 'https://openalex.org/W1760611253', 'https://openalex.org/W1589140671', 'https://openalex.org/W1515117609', 'https://openalex.org/W131709709'], 'abstract_inverted_index': {'Reinforcement': [0], 'learning': [1, 11, 64, 96, 101, 148], '(RL)': [2], 'algorithms': [3], 'provide': [4], 'a': [5, 76, 144, 169], 'sound': [6], 'theoretical': [7], 'basis': [8], 'for': [9, 14, 32, 147], 'building': [10], 'control': [12], 'architectures': [13], 'embedded': [15], 'agents.': [16], 'Unfortunately': [17], 'all': [18], 'of': [19, 24, 35, 55, 79, 109, 168], 'the': [20, 25, 53, 56, 63, 130, 158, 164], 'theory': [21], 'and': [22, 118, 161], 'much': [23], 'practice': [26], '(see': [27], 'Barto': [28], 'et': [29], 'al.,': [30], '1983,': [31], 'an': [33], 'exception)': [34], 'RL': [36, 133], 'is': [37, 58, 127, 135], 'limited': [38], 'to': [39, 62, 86, 106, 124, 137], 'Markovian': [40], 'decision': [41, 46, 81], 'processes': [42], '(MDPs).': [43], 'Many': [44], 'real-world': [45], 'tasks,': [47], 'however,': [48], 'are': [49], 'inherently': [50], 'non-Markovian,': [51], 'i.e.,': [52], 'state': [54, 110], 'environment': [57], 'only': [59, 71], 'incompletely': [60], 'known': [61], 'agent.': [65], 'In': [66], 'this': [67], 'paper': [68, 99], 'we': [69, 142], 'consider': [70], 'partially': [72], 'observable': [73], 'MDPs': [74], '(POMDPs),': [75], 'useful': [77], 'class': [78], 'non-Markovian': [80], 'processes.': [82], 'Most': [83], 'previous': [84], 'approaches': [85], 'such': [87], 'problems': [88], 'have': [89], 'combined': [90], 'computationally': [91], 'expensive': [92], 'state-estimation': [93, 150], 'techniques': [94], 'with': [95, 139], 'control.': [97], 'This': [98], 'investigates': [100], 'in': [102, 151, 157], 'POMDPs': [103, 152], 'without': [104, 149], 'resorting': [105], 'any': [107], 'form': [108], 'estimation.': [111], 'We': [112], 'present': [113], 'results': [114], 'about': [115], 'what': [116], 'TD(0)': [117], 'Q-learning': [119], 'will': [120], 'do': [121], 'when': [122], 'applied': [123], 'POMDPs.': [125, 140], 'It': [126], 'shown': [128], 'that': [129], 'conventional': [131], 'discounted': [132], 'framework': [134, 146], 'inadequate': [136], 'deal': [138], 'Finally': [141], 'develop': [143], 'new': [145], 'by': [153, 162], 'including': [154], 'stochastic': [155], 'policies': [156], 'search': [159], 'space,': [160], 'defining': [163], 'value': [165], 'or': [166], 'utility': [167], 'distribution': [170], 'over': [171], 'states.': [172]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W1541084404', 'counts_by_year': [{'year': 2024, 'cited_by_count': 4}, {'year': 2023, 'cited_by_count': 9}, {'year': 2022, 'cited_by_count': 6}, {'year': 2021, 'cited_by_count': 23}, {'year': 2020, 'cited_by_count': 17}, {'year': 2019, 'cited_by_count': 17}, {'year': 2018, 'cited_by_count': 11}, {'year': 2017, 'cited_by_count': 5}, {'year': 2016, 'cited_by_count': 11}, {'year': 2015, 'cited_by_count': 15}, {'year': 2014, 'cited_by_count': 11}, {'year': 2013, 'cited_by_count': 12}, {'year': 2012, 'cited_by_count': 14}], 'updated_date': '2024-09-18T15:23:03.217454', 'created_date': '2016-06-24'}
Publication Information

Basic Information

Access and Citation

AI Researcher Chatbot

Primary Location

Authors

Topics

Keywords

Related Works