Get quick answers to your questions about the article from our AI researcher chatbot
{'id': 'https://openalex.org/W2047295649', 'doi': 'https://doi.org/10.1162/089120103322711578', 'title': 'The Web as a Parallel Corpus', 'display_name': 'The Web as a Parallel Corpus', 'publication_year': 2003, 'publication_date': '2003-09-01', 'ids': {'openalex': 'https://openalex.org/W2047295649', 'doi': 'https://doi.org/10.1162/089120103322711578', 'mag': '2047295649'}, 'language': 'en', 'primary_location': {'is_oa': True, 'landing_page_url': 'https://doi.org/10.1162/089120103322711578', 'pdf_url': 'http://www.mitpressjournals.org/doi/pdf/10.1162/089120103322711578', 'source': {'id': 'https://openalex.org/S155526855', 'display_name': 'Computational Linguistics', 'issn_l': '0891-2017', 'issn': ['0891-2017', '1530-9312'], 'is_oa': True, 'is_in_doaj': True, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320244', 'host_organization_name': 'Association for Computational Linguistics', 'host_organization_lineage': ['https://openalex.org/P4310320244'], 'host_organization_lineage_names': ['Association for Computational Linguistics'], 'type': 'journal'}, 'license': 'cc-by-nc-nd', 'license_id': 'https://openalex.org/licenses/cc-by-nc-nd', 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}, 'type': 'article', 'type_crossref': 'journal-article', 'indexed_in': ['crossref'], 'open_access': {'is_oa': True, 'oa_status': 'hybrid', 'oa_url': 'http://www.mitpressjournals.org/doi/pdf/10.1162/089120103322711578', 'any_repository_has_fulltext': False}, 'authorships': [{'author_position': 'first', 'author': {'id': 'https://openalex.org/A5059946729', 'display_name': 'Philip Resnik', 'orcid': 'https://orcid.org/0000-0002-6130-8602'}, 'institutions': [{'id': 'https://openalex.org/I66946132', 'display_name': 'University of Maryland, College Park', 'ror': 'https://ror.org/047s2c258', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I66946132']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Philip Resnik', 'raw_affiliation_strings': ['University of Maryland, Department of Linguistics and Institute for Advanced Computer Studies, University of Maryland, College Park, MD 20742.'], 'affiliations': [{'raw_affiliation_string': 'University of Maryland, Department of Linguistics and Institute for Advanced Computer Studies, University of Maryland, College Park, MD 20742.', 'institution_ids': ['https://openalex.org/I66946132']}]}, {'author_position': 'last', 'author': {'id': 'https://openalex.org/A5088517824', 'display_name': 'Noah A. Smith', 'orcid': 'https://orcid.org/0000-0002-2310-6380'}, 'institutions': [{'id': 'https://openalex.org/I145311948', 'display_name': 'Johns Hopkins University', 'ror': 'https://ror.org/00za53h95', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I145311948']}], 'countries': ['US'], 'is_corresponding': False, 'raw_author_name': 'Noah A. Smith', 'raw_affiliation_strings': ['Johns Hopkins University, Department of Computer Science and Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218.'], 'affiliations': [{'raw_affiliation_string': 'Johns Hopkins University, Department of Computer Science and Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218.', 'institution_ids': ['https://openalex.org/I145311948']}]}], 'institution_assertions': [], 'countries_distinct_count': 1, 'institutions_distinct_count': 2, 'corresponding_author_ids': [], 'corresponding_institution_ids': [], 'apc_list': {'value': 0, 'currency': 'USD', 'value_usd': 0, 'provenance': 'doaj'}, 'apc_paid': {'value': 0, 'currency': 'USD', 'value_usd': 0, 'provenance': 'doaj'}, 'fwci': 31.232, 'has_fulltext': False, 'cited_by_count': 551, 'citation_normalized_percentile': {'value': 0.999947, 'is_in_top_1_percent': True, 'is_in_top_10_percent': True}, 'cited_by_percentile_year': {'min': 99, 'max': 100}, 'biblio': {'volume': '29', 'issue': '3', 'first_page': '349', 'last_page': '380'}, 'is_retracted': False, 'is_paratext': False, 'primary_topic': {'id': 'https://openalex.org/T10181', 'display_name': 'Natural Language Processing Techniques', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, 'topics': [{'id': 'https://openalex.org/T10181', 'display_name': 'Natural Language Processing Techniques', 'score': 0.9998, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T12380', 'display_name': 'Authorship Attribution and Profiling', 'score': 0.9872, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}, {'id': 'https://openalex.org/T10028', 'display_name': 'Topic Modeling', 'score': 0.9856, 'subfield': {'id': 'https://openalex.org/subfields/1702', 'display_name': 'Artificial Intelligence'}, 'field': {'id': 'https://openalex.org/fields/17', 'display_name': 'Computer Science'}, 'domain': {'id': 'https://openalex.org/domains/3', 'display_name': 'Physical Sciences'}}], 'keywords': [{'id': 'https://openalex.org/keywords/parallel-corpora', 'display_name': 'Parallel corpora', 'score': 0.6034821}, {'id': 'https://openalex.org/keywords/text-corpus', 'display_name': 'Text corpus', 'score': 0.41246477}], 'concepts': [{'id': 'https://openalex.org/C41008148', 'wikidata': 'https://www.wikidata.org/wiki/Q21198', 'display_name': 'Computer science', 'level': 0, 'score': 0.8867483}, {'id': 'https://openalex.org/C110875604', 'wikidata': 'https://www.wikidata.org/wiki/Q75', 'display_name': 'The Internet', 'level': 2, 'score': 0.6416}, {'id': 'https://openalex.org/C2985367798', 'wikidata': 'https://www.wikidata.org/wiki/Q1346592', 'display_name': 'Parallel corpora', 'level': 3, 'score': 0.6034821}, {'id': 'https://openalex.org/C204321447', 'wikidata': 'https://www.wikidata.org/wiki/Q30642', 'display_name': 'Natural language processing', 'level': 1, 'score': 0.54117036}, {'id': 'https://openalex.org/C2780069185', 'wikidata': 'https://www.wikidata.org/wiki/Q7977945', 'display_name': 'Equivalence (formal languages)', 'level': 2, 'score': 0.52018}, {'id': 'https://openalex.org/C139807058', 'wikidata': 'https://www.wikidata.org/wiki/Q352374', 'display_name': 'Adaptation (eye)', 'level': 2, 'score': 0.47952414}, {'id': 'https://openalex.org/C154945302', 'wikidata': 'https://www.wikidata.org/wiki/Q11660', 'display_name': 'Artificial intelligence', 'level': 1, 'score': 0.45515674}, {'id': 'https://openalex.org/C23123220', 'wikidata': 'https://www.wikidata.org/wiki/Q816826', 'display_name': 'Information retrieval', 'level': 1, 'score': 0.4305455}, {'id': 'https://openalex.org/C177264268', 'wikidata': 'https://www.wikidata.org/wiki/Q1514741', 'display_name': 'Set (abstract data type)', 'level': 2, 'score': 0.42798194}, {'id': 'https://openalex.org/C197046077', 'wikidata': 'https://www.wikidata.org/wiki/Q785337', 'display_name': 'Web mining', 'level': 3, 'score': 0.41733503}, {'id': 'https://openalex.org/C195324797', 'wikidata': 'https://www.wikidata.org/wiki/Q33742', 'display_name': 'Natural language', 'level': 2, 'score': 0.4167649}, {'id': 'https://openalex.org/C2474386', 'wikidata': 'https://www.wikidata.org/wiki/Q461183', 'display_name': 'Text corpus', 'level': 2, 'score': 0.41246477}, {'id': 'https://openalex.org/C136764020', 'wikidata': 'https://www.wikidata.org/wiki/Q466', 'display_name': 'World Wide Web', 'level': 1, 'score': 0.3686223}, {'id': 'https://openalex.org/C203005215', 'wikidata': 'https://www.wikidata.org/wiki/Q79798', 'display_name': 'Machine translation', 'level': 2, 'score': 0.35241267}, {'id': 'https://openalex.org/C35578498', 'wikidata': 'https://www.wikidata.org/wiki/Q193424', 'display_name': 'Web service', 'level': 2, 'score': 0.2056903}, {'id': 'https://openalex.org/C199360897', 'wikidata': 'https://www.wikidata.org/wiki/Q9143', 'display_name': 'Programming language', 'level': 1, 'score': 0.08107251}, {'id': 'https://openalex.org/C41895202', 'wikidata': 'https://www.wikidata.org/wiki/Q8162', 'display_name': 'Linguistics', 'level': 1, 'score': 0.0}, {'id': 'https://openalex.org/C138885662', 'wikidata': 'https://www.wikidata.org/wiki/Q5891', 'display_name': 'Philosophy', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C121332964', 'wikidata': 'https://www.wikidata.org/wiki/Q413', 'display_name': 'Physics', 'level': 0, 'score': 0.0}, {'id': 'https://openalex.org/C120665830', 'wikidata': 'https://www.wikidata.org/wiki/Q14620', 'display_name': 'Optics', 'level': 1, 'score': 0.0}], 'mesh': [], 'locations_count': 1, 'locations': [{'is_oa': True, 'landing_page_url': 'https://doi.org/10.1162/089120103322711578', 'pdf_url': 'http://www.mitpressjournals.org/doi/pdf/10.1162/089120103322711578', 'source': {'id': 'https://openalex.org/S155526855', 'display_name': 'Computational Linguistics', 'issn_l': '0891-2017', 'issn': ['0891-2017', '1530-9312'], 'is_oa': True, 'is_in_doaj': True, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320244', 'host_organization_name': 'Association for Computational Linguistics', 'host_organization_lineage': ['https://openalex.org/P4310320244'], 'host_organization_lineage_names': ['Association for Computational Linguistics'], 'type': 'journal'}, 'license': 'cc-by-nc-nd', 'license_id': 'https://openalex.org/licenses/cc-by-nc-nd', 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}], 'best_oa_location': {'is_oa': True, 'landing_page_url': 'https://doi.org/10.1162/089120103322711578', 'pdf_url': 'http://www.mitpressjournals.org/doi/pdf/10.1162/089120103322711578', 'source': {'id': 'https://openalex.org/S155526855', 'display_name': 'Computational Linguistics', 'issn_l': '0891-2017', 'issn': ['0891-2017', '1530-9312'], 'is_oa': True, 'is_in_doaj': True, 'is_core': True, 'host_organization': 'https://openalex.org/P4310320244', 'host_organization_name': 'Association for Computational Linguistics', 'host_organization_lineage': ['https://openalex.org/P4310320244'], 'host_organization_lineage_names': ['Association for Computational Linguistics'], 'type': 'journal'}, 'license': 'cc-by-nc-nd', 'license_id': 'https://openalex.org/licenses/cc-by-nc-nd', 'version': 'publishedVersion', 'is_accepted': True, 'is_published': True}, 'sustainable_development_goals': [{'score': 0.69, 'id': 'https://metadata.un.org/sdg/4', 'display_name': 'Quality education'}], 'grants': [], 'datasets': [], 'versions': [], 'referenced_works_count': 41, 'referenced_works': ['https://openalex.org/W1496351660', 'https://openalex.org/W1502581825', 'https://openalex.org/W1533946607', 'https://openalex.org/W1554769004', 'https://openalex.org/W157432847', 'https://openalex.org/W1581740421', 'https://openalex.org/W1588612820', 'https://openalex.org/W1608419360', 'https://openalex.org/W1636405317', 'https://openalex.org/W1647729745', 'https://openalex.org/W1969178697', 'https://openalex.org/W1977545325', 'https://openalex.org/W1995875735', 'https://openalex.org/W200075660', 'https://openalex.org/W2007709031', 'https://openalex.org/W2012268403', 'https://openalex.org/W2016630033', 'https://openalex.org/W2032773168', 'https://openalex.org/W2041404167', 'https://openalex.org/W2048679005', 'https://openalex.org/W2079442239', 'https://openalex.org/W2097333193', 'https://openalex.org/W2101210369', 'https://openalex.org/W2107330157', 'https://openalex.org/W2108997961', 'https://openalex.org/W2114930830', 'https://openalex.org/W2132957691', 'https://openalex.org/W2137854946', 'https://openalex.org/W2138753018', 'https://openalex.org/W2141325213', 'https://openalex.org/W2144810223', 'https://openalex.org/W2145080939', 'https://openalex.org/W2152565070', 'https://openalex.org/W2154124206', 'https://openalex.org/W2154384676', 'https://openalex.org/W2172167844', 'https://openalex.org/W2883783597', 'https://openalex.org/W3104029765', 'https://openalex.org/W4241850027', 'https://openalex.org/W4249159365', 'https://openalex.org/W4285719527'], 'related_works': ['https://openalex.org/W4307459710', 'https://openalex.org/W3175595715', 'https://openalex.org/W3155572818', 'https://openalex.org/W3152052241', 'https://openalex.org/W3108641831', 'https://openalex.org/W2986030184', 'https://openalex.org/W2985215540', 'https://openalex.org/W2786253471', 'https://openalex.org/W2604275745', 'https://openalex.org/W2104907655'], 'abstract_inverted_index': {'Parallel': [0], 'corpora': [1], 'have': [2], 'become': [3], 'an': [4], 'essential': [5], 'resource': [6], 'for': [7, 26, 87, 114], 'work': [8, 21], 'in': [9, 106], 'multilingual': [10], 'natural': [11], 'language': [12, 117], 'processing.': [13], 'In': [14], 'this': [15], 'article,': [16], 'we': [17], 'report': [18], 'on': [19, 30, 59, 94], 'our': [20], 'using': [22], 'the': [23, 31, 37, 53, 78, 84, 92, 99, 107], 'STRAND': [24], 'system': [25, 79], 'mining': [27, 88], 'parallel': [28, 89, 112], 'text': [29, 90], 'World': [32], 'Wide': [33], 'Web,': [34], 'first': [35], 'reviewing': [36], 'original': [38], 'algorithm': [39], 'and': [40, 42, 75], 'results': [41], 'then': [43], 'presenting': [44], 'a': [45, 68, 95, 110, 115], 'set': [46], 'of': [47, 55, 62, 72, 77, 83, 101, 109], 'significant': [48, 111], 'enhancements.': [49], 'These': [50], 'enhancements': [51], 'include': [52], 'use': [54], 'supervised': [56], 'learning': [57], 'based': [58], 'structural': [60], 'features': [61], 'documents': [63], 'to': [64, 80], 'improve': [65], 'classification': [66], 'performance,': [67], 'new': [69], 'content-based': [70], 'measure': [71], 'translational': [73], 'equivalence,': [74], 'adaptation': [76], 'take': [81], 'advantage': [82], 'Internet': [85], 'Archive': [86], 'from': [91], 'Web': [93], 'large': [96], 'scale.': [97], 'Finally,': [98], 'value': [100], 'these': [102], 'techniques': [103], 'is': [104], 'demonstrated': [105], 'construction': [108], 'corpus': [113], 'low-density': [116], 'pair.': [118]}, 'cited_by_api_url': 'https://api.openalex.org/works?filter=cites:W2047295649', 'counts_by_year': [{'year': 2024, 'cited_by_count': 1}, {'year': 2023, 'cited_by_count': 6}, {'year': 2022, 'cited_by_count': 8}, {'year': 2021, 'cited_by_count': 22}, {'year': 2020, 'cited_by_count': 26}, {'year': 2019, 'cited_by_count': 23}, {'year': 2018, 'cited_by_count': 16}, {'year': 2017, 'cited_by_count': 21}, {'year': 2016, 'cited_by_count': 29}, {'year': 2015, 'cited_by_count': 20}, {'year': 2014, 'cited_by_count': 36}, {'year': 2013, 'cited_by_count': 30}, {'year': 2012, 'cited_by_count': 45}], 'updated_date': '2025-01-08T22:15:00.849528', 'created_date': '2016-06-24'}