diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fc8388..56406e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog All notable changes to this project will be documented in this file. +## v0.0.5 2023-12-28 +### Improvements +- Add Published, ArxivID, SourceBank field in Article + + +### Bug Fixes ## v0.0.4 2023-10-14 ### Improvements @@ -8,7 +14,6 @@ All notable changes to this project will be documented in this file. - Add FlagAffiliationMining_TITIPATA from Api - Add ParseMethod field in Affiliation - ### Bug Fixes - Fix go_affiliation_mining 2023-12-25 - Fix `E501` line too long @@ -34,7 +39,6 @@ All notable changes to this project will be documented in this file. - Add Client Topic Extraction - Change Topics (list[str] to list[dict]) - ### Bug Fixes - Fix Slow Request by session - Fix Github Action @@ -82,7 +86,6 @@ All notable changes to this project will be documented in this file. - Fix proccess_bar in export ## v0.0.1 - 2023-02-05 - ### Improvements - Build Dockerfile - Start flake8 activity 2023-03-05 diff --git a/README.md b/README.md index a03a62c..1990f56 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ get list of PMID in state 0 and save to file for debugginf use outfile.write(data1) ``` -open befor file for debuging use +open before file for debugging use ```python f = open('sample1.json') data = json.load(f) @@ -231,15 +231,15 @@ The preparation of the article for extracting the graph has different steps that *List of state number* -|State|Description| -|-|-| -|0|article identifier saved| -|1|article details article info saved (json Form)| -|2|parse details info| -|3|Get Citation| - -|-1|Error| +|State|Short Description|Description| +|-----|-----------------|-----------| +|0 |article identifier saved|At this stage, the article object stored in the data bank has only one identifier, such as the PMID or DOI identifier| +|1 |article details article info saved (json Form)|Metadata related to the article is stored in the `OreginalArticle` field from the `SourceBank`, but it has not been parsed yet| +|2 |parse details info|| +|3 |Get Citation|| + +|-1 |Error|if error happend in move state 1 to 2| There are two ways to run a pipeline. In the first method, we give the number of the existing state and all the articles in this state move forward one state. diff --git a/database/Arxiv_test.json b/database/Arxiv_test.json new file mode 100644 index 0000000..4a1b0bb --- /dev/null +++ b/database/Arxiv_test.json @@ -0,0 +1,1567 @@ +{ + "_default": { + "1": { + "Abstract": "Integrating large language models (LLMs) into healthcare presents potential but faces challenges. Directly pre-training LLMs for domains like medicine is resource-heavy and sometimes unfeasible. Sole reliance on Supervised Fine-tuning (SFT) can result in overconfident predictions and may not tap into domain specific insights. Addressing these challenges, we present a multi-stage training method combining Domain-specific Continued Pre-training (DCPT), SFT, and Direct Preference Optimization (DPO). A notable contribution of our study is the introduction of a 3Gb Chinese Medicine (ChiMed) dataset, encompassing medical question answering, plain texts, knowledge graphs, and dialogues, segmented into three training stages. The medical LLM trained with our pipeline, Qilin-Med, exhibits significant performance boosts. In the CPT and SFT phases, it achieves 38.4% and 40.0% accuracy on the CMExam, surpassing Baichuan-7B's 33.5%. In the DPO phase, on the Huatuo-26M test set, it scores 16.66 in BLEU-1 and 27.44 in ROUGE1, outperforming the SFT's 12.69 and 24.21. This highlights the strength of our training approach in refining LLMs for medical applications.", + "ArxivID": "2310.09089v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Qichen Ye", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Junling Liu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Dading Chong", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Peilin Zhou", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Yining Hua", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Andrew Liu", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Qichen Ye" + }, + { + "name": "Junling Liu" + }, + { + "name": "Dading Chong" + }, + { + "name": "Peilin Zhou" + }, + { + "name": "Yining Hua" + }, + { + "name": "Andrew Liu" + } + ], + "category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + "id": "http://arxiv.org/abs/2310.09089v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2310.09089v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2310.09089v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-10-13T13:17:03Z", + "summary": "Integrating large language models (LLMs) into healthcare presents potential\nbut faces challenges. Directly pre-training LLMs for domains like medicine is\nresource-heavy and sometimes unfeasible. Sole reliance on Supervised\nFine-tuning (SFT) can result in overconfident predictions and may not tap into\ndomain specific insights. Addressing these challenges, we present a multi-stage\ntraining method combining Domain-specific Continued Pre-training (DCPT), SFT,\nand Direct Preference Optimization (DPO). A notable contribution of our study\nis the introduction of a 3Gb Chinese Medicine (ChiMed) dataset, encompassing\nmedical question answering, plain texts, knowledge graphs, and dialogues,\nsegmented into three training stages. The medical LLM trained with our\npipeline, Qilin-Med, exhibits significant performance boosts. In the CPT and\nSFT phases, it achieves 38.4% and 40.0% accuracy on the CMExam, surpassing\nBaichuan-7B's 33.5%. In the DPO phase, on the Huatuo-26M test set, it scores\n16.66 in BLEU-1 and 27.44 in ROUGE1, outperforming the SFT's 12.69 and 24.21.\nThis highlights the strength of our training approach in refining LLMs for\nmedical applications.", + "title": "Qilin-Med: Multi-stage Knowledge Injection Advanced Medical Large\n Language Model", + "updated": "2023-10-13T13:17:03Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-10-13T13:17:03Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Qilin-Med: Multi-stage Knowledge Injection Advanced Medical Large Language Model", + "Topics": null + }, + "2": { + "Abstract": "Few-shot learning has been studied to adapt models to tasks with very few samples. It holds profound significance, particularly in clinical tasks, due to the high annotation cost of medical images. Several works have explored few-shot learning on medical images, yet they still require a large number of medical images for pre-training models to gain domain-specific priors. Vision foundation models recently have achieved remarkable success in natural images. Hence, adapting rapidly advancing vision foundation models from natural images to few-shot clinical tasks holds great promise. MedFMC has recently organized a challenge to shed more light on this topic at NeurIPS 2023. In this work, we present our challenge solution. We observe that a simple variant of fine-tuning with partial freezing shows remarkable performance. Empirical evidence demonstrates that this approach could outperform various common fine-tuning methods under limited sample sizes. Additionally, we explore enhanced utilization of semantic supervision to boost performance. We propose a novel approach that contextualizes labels via large language models (LLMs). Our findings reveal that the context generated by LLMs significantly enhances the discrimination of semantic embeddings for similar categories, resulting in a notable performance improvement of 3%-5% in 1-shot settings compared to commonly employed one-hot labels and other semantic supervision methods. Our solution secures the 1st place in the MedFMC challenge.", + "ArxivID": "2312.07125v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Kaipeng Zheng", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Weiran Huang", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Lichao Sun", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CV", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Kaipeng Zheng" + }, + { + "name": "Weiran Huang" + }, + { + "name": "Lichao Sun" + } + ], + "category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CV" + }, + "id": "http://arxiv.org/abs/2312.07125v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2312.07125v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2312.07125v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-12-12T09:58:07Z", + "summary": "Few-shot learning has been studied to adapt models to tasks with very few\nsamples. It holds profound significance, particularly in clinical tasks, due to\nthe high annotation cost of medical images. Several works have explored\nfew-shot learning on medical images, yet they still require a large number of\nmedical images for pre-training models to gain domain-specific priors. Vision\nfoundation models recently have achieved remarkable success in natural images.\nHence, adapting rapidly advancing vision foundation models from natural images\nto few-shot clinical tasks holds great promise. MedFMC has recently organized a\nchallenge to shed more light on this topic at NeurIPS 2023. In this work, we\npresent our challenge solution. We observe that a simple variant of fine-tuning\nwith partial freezing shows remarkable performance. Empirical evidence\ndemonstrates that this approach could outperform various common fine-tuning\nmethods under limited sample sizes. Additionally, we explore enhanced\nutilization of semantic supervision to boost performance. We propose a novel\napproach that contextualizes labels via large language models (LLMs). Our\nfindings reveal that the context generated by LLMs significantly enhances the\ndiscrimination of semantic embeddings for similar categories, resulting in a\nnotable performance improvement of 3%-5% in 1-shot settings compared to\ncommonly employed one-hot labels and other semantic supervision methods. Our\nsolution secures the 1st place in the MedFMC challenge.", + "title": "Efficient Few-Shot Clinical Task Adaptation with Large Language Models", + "updated": "2023-12-12T09:58:07Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-12-12T09:58:07Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Efficient Few-Shot Clinical Task Adaptation with Large Language Models", + "Topics": null + }, + "3": { + "Abstract": "An accurate differential diagnosis (DDx) is a cornerstone of medical care, often reached through an iterative process of interpretation that combines clinical history, physical examination, investigations and procedures. Interactive interfaces powered by Large Language Models (LLMs) present new opportunities to both assist and automate aspects of this process. In this study, we introduce an LLM optimized for diagnostic reasoning, and evaluate its ability to generate a DDx alone or as an aid to clinicians. 20 clinicians evaluated 302 challenging, real-world medical cases sourced from the New England Journal of Medicine (NEJM) case reports. Each case report was read by two clinicians, who were randomized to one of two assistive conditions: either assistance from search engines and standard medical resources, or LLM assistance in addition to these tools. All clinicians provided a baseline, unassisted DDx prior to using the respective assistive tools. Our LLM for DDx exhibited standalone performance that exceeded that of unassisted clinicians (top-10 accuracy 59.1% vs 33.6%, [p = 0.04]). Comparing the two assisted study arms, the DDx quality score was higher for clinicians assisted by our LLM (top-10 accuracy 51.7%) compared to clinicians without its assistance (36.1%) (McNemar's Test: 45.7, p < 0.01) and clinicians with search (44.4%) (4.75, p = 0.03). Further, clinicians assisted by our LLM arrived at more comprehensive differential lists than those without its assistance. Our study suggests that our LLM for DDx has potential to improve clinicians' diagnostic reasoning and accuracy in challenging cases, meriting further real-world evaluation for its ability to empower physicians and widen patients' access to specialist-level expertise.", + "ArxivID": "2312.00164v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Daniel McDuff", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Mike Schaekermann", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Tao Tu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Anil Palepu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Amy Wang", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Jake Garrison", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Karan Singhal", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Yash Sharma", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Shekoofeh Azizi", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Kavita Kulkarni", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Le Hou", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Yong Cheng", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Yun Liu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "S Sara Mahdavi", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Sushant Prakash", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Anupam Pathak", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Christopher Semturs", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Shwetak Patel", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Dale R Webster", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Ewa Dominowska", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Juraj Gottweis", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Joelle Barral", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Katherine Chou", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Greg S Corrado", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Yossi Matias", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Jake Sunshine", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Alan Karthikesalingam", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Vivek Natarajan", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CY", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Daniel McDuff" + }, + { + "name": "Mike Schaekermann" + }, + { + "name": "Tao Tu" + }, + { + "name": "Anil Palepu" + }, + { + "name": "Amy Wang" + }, + { + "name": "Jake Garrison" + }, + { + "name": "Karan Singhal" + }, + { + "name": "Yash Sharma" + }, + { + "name": "Shekoofeh Azizi" + }, + { + "name": "Kavita Kulkarni" + }, + { + "name": "Le Hou" + }, + { + "name": "Yong Cheng" + }, + { + "name": "Yun Liu" + }, + { + "name": "S Sara Mahdavi" + }, + { + "name": "Sushant Prakash" + }, + { + "name": "Anupam Pathak" + }, + { + "name": "Christopher Semturs" + }, + { + "name": "Shwetak Patel" + }, + { + "name": "Dale R Webster" + }, + { + "name": "Ewa Dominowska" + }, + { + "name": "Juraj Gottweis" + }, + { + "name": "Joelle Barral" + }, + { + "name": "Katherine Chou" + }, + { + "name": "Greg S Corrado" + }, + { + "name": "Yossi Matias" + }, + { + "name": "Jake Sunshine" + }, + { + "name": "Alan Karthikesalingam" + }, + { + "name": "Vivek Natarajan" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CY" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + } + ], + "id": "http://arxiv.org/abs/2312.00164v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2312.00164v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2312.00164v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-11-30T19:55:51Z", + "summary": "An accurate differential diagnosis (DDx) is a cornerstone of medical care,\noften reached through an iterative process of interpretation that combines\nclinical history, physical examination, investigations and procedures.\nInteractive interfaces powered by Large Language Models (LLMs) present new\nopportunities to both assist and automate aspects of this process. In this\nstudy, we introduce an LLM optimized for diagnostic reasoning, and evaluate its\nability to generate a DDx alone or as an aid to clinicians. 20 clinicians\nevaluated 302 challenging, real-world medical cases sourced from the New\nEngland Journal of Medicine (NEJM) case reports. Each case report was read by\ntwo clinicians, who were randomized to one of two assistive conditions: either\nassistance from search engines and standard medical resources, or LLM\nassistance in addition to these tools. All clinicians provided a baseline,\nunassisted DDx prior to using the respective assistive tools. Our LLM for DDx\nexhibited standalone performance that exceeded that of unassisted clinicians\n(top-10 accuracy 59.1% vs 33.6%, [p = 0.04]). Comparing the two assisted study\narms, the DDx quality score was higher for clinicians assisted by our LLM\n(top-10 accuracy 51.7%) compared to clinicians without its assistance (36.1%)\n(McNemar's Test: 45.7, p < 0.01) and clinicians with search (44.4%) (4.75, p =\n0.03). Further, clinicians assisted by our LLM arrived at more comprehensive\ndifferential lists than those without its assistance. Our study suggests that\nour LLM for DDx has potential to improve clinicians' diagnostic reasoning and\naccuracy in challenging cases, meriting further real-world evaluation for its\nability to empower physicians and widen patients' access to specialist-level\nexpertise.", + "title": "Towards Accurate Differential Diagnosis with Large Language Models", + "updated": "2023-11-30T19:55:51Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-11-30T19:55:51Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Towards Accurate Differential Diagnosis with Large Language Models", + "Topics": null + }, + "4": { + "Abstract": "Large language models (LLMs) have been applied to tasks in healthcare, ranging from medical exam questions to responding to patient questions. With increasing institutional partnerships between companies producing LLMs and healthcare systems, real world clinical application is coming closer to reality. As these models gain traction, it is essential for healthcare practitioners to understand what LLMs are, their development, their current and potential applications, and the associated pitfalls when utilized in medicine. This review and accompanying tutorial aim to give an overview of these topics to aid healthcare practitioners in understanding the rapidly changing landscape of LLMs as applied to medicine.", + "ArxivID": "2309.00087v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Jesutofunmi A. Omiye", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Haiwen Gui", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Shawheen J. Rezaei", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "James Zou", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Roxana Daneshjou", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Jesutofunmi A. Omiye" + }, + { + "name": "Haiwen Gui" + }, + { + "name": "Shawheen J. Rezaei" + }, + { + "name": "James Zou" + }, + { + "name": "Roxana Daneshjou" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CY" + } + ], + "id": "http://arxiv.org/abs/2309.00087v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2309.00087v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2309.00087v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-08-31T19:06:39Z", + "summary": "Large language models (LLMs) have been applied to tasks in healthcare,\nranging from medical exam questions to responding to patient questions. With\nincreasing institutional partnerships between companies producing LLMs and\nhealthcare systems, real world clinical application is coming closer to\nreality. As these models gain traction, it is essential for healthcare\npractitioners to understand what LLMs are, their development, their current and\npotential applications, and the associated pitfalls when utilized in medicine.\nThis review and accompanying tutorial aim to give an overview of these topics\nto aid healthcare practitioners in understanding the rapidly changing landscape\nof LLMs as applied to medicine.", + "title": "Large language models in medicine: the potentials and pitfalls", + "updated": "2023-08-31T19:06:39Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-08-31T19:06:39Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Large language models in medicine: the potentials and pitfalls", + "Topics": null + }, + "5": { + "Abstract": "A knowledge gap persists between machine learning (ML) developers (e.g., data scientists) and practitioners (e.g., clinicians), hampering the full utilization of ML for clinical data analysis. We investigated the potential of the ChatGPT Advanced Data Analysis (ADA), an extension of GPT-4, to bridge this gap and perform ML analyses efficiently. Real-world clinical datasets and study details from large trials across various medical specialties were presented to ChatGPT ADA without specific guidance. ChatGPT ADA autonomously developed state-of-the-art ML models based on the original study's training data to predict clinical outcomes such as cancer development, cancer progression, disease complications, or biomarkers such as pathogenic gene sequences. Following the re-implementation and optimization of the published models, the head-to-head comparison of the ChatGPT ADA-crafted ML models and their respective manually crafted counterparts revealed no significant differences in traditional performance metrics (P>0.474). Strikingly, the ChatGPT ADA-crafted ML models often outperformed their counterparts. In conclusion, ChatGPT ADA offers a promising avenue to democratize ML in medicine by simplifying complex data analyses, yet should enhance, not replace, specialized training and resources, to promote broader applications in medical research and practice.", + "ArxivID": "2308.14120v3", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Soroosh Tayebi Arasteh", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Tianyu Han", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Mahshad Lotfinia", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Christiane Kuhl", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Jakob Nikolas Kather", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Daniel Truhn", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Sven Nebelung", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.LG", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Soroosh Tayebi Arasteh" + }, + { + "name": "Tianyu Han" + }, + { + "name": "Mahshad Lotfinia" + }, + { + "name": "Christiane Kuhl" + }, + { + "name": "Jakob Nikolas Kather" + }, + { + "name": "Daniel Truhn" + }, + { + "name": "Sven Nebelung" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.LG" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + } + ], + "id": "http://arxiv.org/abs/2308.14120v3", + "link": [ + { + "@href": "http://arxiv.org/abs/2308.14120v3", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2308.14120v3", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-08-27T14:28:38Z", + "summary": "A knowledge gap persists between machine learning (ML) developers (e.g., data\nscientists) and practitioners (e.g., clinicians), hampering the full\nutilization of ML for clinical data analysis. We investigated the potential of\nthe ChatGPT Advanced Data Analysis (ADA), an extension of GPT-4, to bridge this\ngap and perform ML analyses efficiently. Real-world clinical datasets and study\ndetails from large trials across various medical specialties were presented to\nChatGPT ADA without specific guidance. ChatGPT ADA autonomously developed\nstate-of-the-art ML models based on the original study's training data to\npredict clinical outcomes such as cancer development, cancer progression,\ndisease complications, or biomarkers such as pathogenic gene sequences.\nFollowing the re-implementation and optimization of the published models, the\nhead-to-head comparison of the ChatGPT ADA-crafted ML models and their\nrespective manually crafted counterparts revealed no significant differences in\ntraditional performance metrics (P>0.474). Strikingly, the ChatGPT ADA-crafted\nML models often outperformed their counterparts. In conclusion, ChatGPT ADA\noffers a promising avenue to democratize ML in medicine by simplifying complex\ndata analyses, yet should enhance, not replace, specialized training and\nresources, to promote broader applications in medical research and practice.", + "title": "Large Language Models Streamline Automated Machine Learning for Clinical\n Studies", + "updated": "2023-10-09T18:01:12Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-08-27T14:28:38Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Large Language Models Streamline Automated Machine Learning for Clinical Studies", + "Topics": null + }, + "6": { + "Abstract": "Large language models (LLMs) have shown promise for generative and knowledge-intensive tasks including question-answering (QA) tasks. However, the practical deployment still faces challenges, notably the issue of \"hallucination\", where models generate plausible-sounding but unfaithful or nonsensical information. This issue becomes particularly critical in the medical domain due to the uncommon professional concepts and potential social risks involved. This paper analyses the phenomenon of hallucination in medical generative QA systems using widely adopted LLMs and datasets. Our investigation centers on the identification and comprehension of common problematic answers, with a specific emphasis on hallucination. To tackle this challenge, we present an interactive self-reflection methodology that incorporates knowledge acquisition and answer generation. Through this feedback process, our approach steadily enhances the factuality, consistency, and entailment of the generated answers. Consequently, we harness the interactivity and multitasking ability of LLMs and produce progressively more precise and accurate answers. Experimental results on both automatic and human evaluation demonstrate the superiority of our approach in hallucination reduction compared to baselines.", + "ArxivID": "2310.06271v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Ziwei Ji", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Tiezheng Yu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Yan Xu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Nayeon Lee", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Etsuko Ishii", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Pascale Fung", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:comment": { + "#text": "Accepted by the findings of EMNLP 2023", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Ziwei Ji" + }, + { + "name": "Tiezheng Yu" + }, + { + "name": "Yan Xu" + }, + { + "name": "Nayeon Lee" + }, + { + "name": "Etsuko Ishii" + }, + { + "name": "Pascale Fung" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + } + ], + "id": "http://arxiv.org/abs/2310.06271v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2310.06271v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2310.06271v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-10-10T03:05:44Z", + "summary": "Large language models (LLMs) have shown promise for generative and\nknowledge-intensive tasks including question-answering (QA) tasks. However, the\npractical deployment still faces challenges, notably the issue of\n\"hallucination\", where models generate plausible-sounding but unfaithful or\nnonsensical information. This issue becomes particularly critical in the\nmedical domain due to the uncommon professional concepts and potential social\nrisks involved. This paper analyses the phenomenon of hallucination in medical\ngenerative QA systems using widely adopted LLMs and datasets. Our investigation\ncenters on the identification and comprehension of common problematic answers,\nwith a specific emphasis on hallucination. To tackle this challenge, we present\nan interactive self-reflection methodology that incorporates knowledge\nacquisition and answer generation. Through this feedback process, our approach\nsteadily enhances the factuality, consistency, and entailment of the generated\nanswers. Consequently, we harness the interactivity and multitasking ability of\nLLMs and produce progressively more precise and accurate answers. Experimental\nresults on both automatic and human evaluation demonstrate the superiority of\nour approach in hallucination reduction compared to baselines.", + "title": "Towards Mitigating Hallucination in Large Language Models via\n Self-Reflection", + "updated": "2023-10-10T03:05:44Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-10-10T03:05:44Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Towards Mitigating Hallucination in Large Language Models via Self-Reflection", + "Topics": null + }, + "7": { + "Abstract": "Although large language models (LLMs) often produce impressive outputs, it remains unclear how they perform in real-world scenarios requiring strong reasoning skills and expert domain knowledge. We set out to investigate whether close- and open-source models (GPT-3.5, LLama-2, etc.) can be applied to answer and reason about difficult real-world-based questions. We focus on three popular medical benchmarks (MedQA-USMLE, MedMCQA, and PubMedQA) and multiple prompting scenarios: Chain-of-Thought (CoT, think step-by-step), few-shot and retrieval augmentation. Based on an expert annotation of the generated CoTs, we found that InstructGPT can often read, reason and recall expert knowledge. Last, by leveraging advances in prompt engineering (few-shot and ensemble methods), we demonstrated that GPT-3.5 not only yields calibrated predictive distributions, but also reaches the passing score on three datasets: MedQA-USMLE 60.2%, MedMCQA 62.7% and PubMedQA 78.2%. Open-source models are closing the gap: Llama-2 70B also passed the MedQA-USMLE with 62.5% accuracy.", + "ArxivID": "2207.08143v4", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Valentin Li\u00e9vin", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Christoffer Egeberg Hother", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Andreas Geert Motzfeldt", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Ole Winther", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:comment": { + "#text": "37 pages, 23 figures. v1: results using InstructGPT, v2.0: added the\n Codex experiments, v2.1: added the missing test MedMCQA results for Codex\n 5-shot CoT and using k=100 samples, v3.0: added results for open source\n models -- ready for publication (final version)", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Valentin Li\u00e9vin" + }, + { + "name": "Christoffer Egeberg Hother" + }, + { + "name": "Andreas Geert Motzfeldt" + }, + { + "name": "Ole Winther" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.LG" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "I.2.1; I.2.7" + } + ], + "id": "http://arxiv.org/abs/2207.08143v4", + "link": [ + { + "@href": "http://arxiv.org/abs/2207.08143v4", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2207.08143v4", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2022-07-17T11:24:44Z", + "summary": "Although large language models (LLMs) often produce impressive outputs, it\nremains unclear how they perform in real-world scenarios requiring strong\nreasoning skills and expert domain knowledge. We set out to investigate whether\nclose- and open-source models (GPT-3.5, LLama-2, etc.) can be applied to answer\nand reason about difficult real-world-based questions. We focus on three\npopular medical benchmarks (MedQA-USMLE, MedMCQA, and PubMedQA) and multiple\nprompting scenarios: Chain-of-Thought (CoT, think step-by-step), few-shot and\nretrieval augmentation. Based on an expert annotation of the generated CoTs, we\nfound that InstructGPT can often read, reason and recall expert knowledge.\nLast, by leveraging advances in prompt engineering (few-shot and ensemble\nmethods), we demonstrated that GPT-3.5 not only yields calibrated predictive\ndistributions, but also reaches the passing score on three datasets:\nMedQA-USMLE 60.2%, MedMCQA 62.7% and PubMedQA 78.2%. Open-source models are\nclosing the gap: Llama-2 70B also passed the MedQA-USMLE with 62.5% accuracy.", + "title": "Can large language models reason about medical questions?", + "updated": "2023-12-24T11:17:23Z" + }, + "PMC": null, + "PMID": null, + "Published": "2022-07-17T11:24:44Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Can large language models reason about medical questions?", + "Topics": null + }, + "8": { + "Abstract": "Large language models (LLMs) such as GPT-4 have recently demonstrated impressive results across a wide range of tasks. LLMs are still limited, however, in that they frequently fail at complex reasoning, their reasoning processes are opaque, they are prone to 'hallucinate' facts, and there are concerns about their underlying biases. Letting models verbalize reasoning steps as natural language, a technique known as chain-of-thought prompting, has recently been proposed as a way to address some of these issues. Here we present ThoughtSource, a meta-dataset and software library for chain-of-thought (CoT) reasoning. The goal of ThoughtSource is to improve future artificial intelligence systems by facilitating qualitative understanding of CoTs, enabling empirical evaluations, and providing training data. This first release of ThoughtSource integrates seven scientific/medical, three general-domain and five math word question answering datasets.", + "ArxivID": "2301.11596v5", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Simon Ott", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Konstantin Hebenstreit", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Valentin Li\u00e9vin", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Christoffer Egeberg Hother", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Milad Moradi", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Maximilian Mayrhauser", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Robert Praas", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Ole Winther", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Matthias Samwald", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:comment": { + "#text": "Revision: added datasets, formatting", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Simon Ott" + }, + { + "name": "Konstantin Hebenstreit" + }, + { + "name": "Valentin Li\u00e9vin" + }, + { + "name": "Christoffer Egeberg Hother" + }, + { + "name": "Milad Moradi" + }, + { + "name": "Maximilian Mayrhauser" + }, + { + "name": "Robert Praas" + }, + { + "name": "Ole Winther" + }, + { + "name": "Matthias Samwald" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + } + ], + "id": "http://arxiv.org/abs/2301.11596v5", + "link": [ + { + "@href": "http://arxiv.org/abs/2301.11596v5", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2301.11596v5", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-01-27T08:45:53Z", + "summary": "Large language models (LLMs) such as GPT-4 have recently demonstrated\nimpressive results across a wide range of tasks. LLMs are still limited,\nhowever, in that they frequently fail at complex reasoning, their reasoning\nprocesses are opaque, they are prone to 'hallucinate' facts, and there are\nconcerns about their underlying biases. Letting models verbalize reasoning\nsteps as natural language, a technique known as chain-of-thought prompting, has\nrecently been proposed as a way to address some of these issues. Here we\npresent ThoughtSource, a meta-dataset and software library for chain-of-thought\n(CoT) reasoning. The goal of ThoughtSource is to improve future artificial\nintelligence systems by facilitating qualitative understanding of CoTs,\nenabling empirical evaluations, and providing training data. This first release\nof ThoughtSource integrates seven scientific/medical, three general-domain and\nfive math word question answering datasets.", + "title": "ThoughtSource: A central hub for large language model reasoning data", + "updated": "2023-07-27T09:37:35Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-01-27T08:45:53Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "ThoughtSource: A central hub for large language model reasoning data", + "Topics": null + }, + "9": { + "Abstract": "Large language models (LLMs) can capture rich representations of concepts that are useful for real-world tasks. However, language alone is limited. While existing LLMs excel at text-based inferences, health applications require that models be grounded in numerical data (e.g., vital signs, laboratory values in clinical domains; steps, movement in the wellness domain) that is not easily or readily expressed as text in existing training corpus. We demonstrate that with only few-shot tuning, a large language model is capable of grounding various physiological and behavioral time-series data and making meaningful inferences on numerous health tasks for both clinical and wellness contexts. Using data from wearable and medical sensor recordings, we evaluate these capabilities on the tasks of cardiac signal analysis, physical activity recognition, metabolic calculation (e.g., calories burned), and estimation of stress reports and mental health screeners.", + "ArxivID": "2305.15525v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Xin Liu", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Daniel McDuff", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Geza Kovacs", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Isaac Galatzer-Levy", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Jacob Sunshine", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Jiening Zhan", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Ming-Zher Poh", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Shun Liao", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Paolo Di Achille", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Shwetak Patel", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Xin Liu" + }, + { + "name": "Daniel McDuff" + }, + { + "name": "Geza Kovacs" + }, + { + "name": "Isaac Galatzer-Levy" + }, + { + "name": "Jacob Sunshine" + }, + { + "name": "Jiening Zhan" + }, + { + "name": "Ming-Zher Poh" + }, + { + "name": "Shun Liao" + }, + { + "name": "Paolo Di Achille" + }, + { + "name": "Shwetak Patel" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.LG" + } + ], + "id": "http://arxiv.org/abs/2305.15525v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2305.15525v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2305.15525v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-05-24T19:25:16Z", + "summary": "Large language models (LLMs) can capture rich representations of concepts\nthat are useful for real-world tasks. However, language alone is limited. While\nexisting LLMs excel at text-based inferences, health applications require that\nmodels be grounded in numerical data (e.g., vital signs, laboratory values in\nclinical domains; steps, movement in the wellness domain) that is not easily or\nreadily expressed as text in existing training corpus. We demonstrate that with\nonly few-shot tuning, a large language model is capable of grounding various\nphysiological and behavioral time-series data and making meaningful inferences\non numerous health tasks for both clinical and wellness contexts. Using data\nfrom wearable and medical sensor recordings, we evaluate these capabilities on\nthe tasks of cardiac signal analysis, physical activity recognition, metabolic\ncalculation (e.g., calories burned), and estimation of stress reports and\nmental health screeners.", + "title": "Large Language Models are Few-Shot Health Learners", + "updated": "2023-05-24T19:25:16Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-05-24T19:25:16Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "Large Language Models are Few-Shot Health Learners", + "Topics": null + }, + "10": { + "Abstract": "The current work investigates the capability of Large language models (LLMs) that are explicitly trained on large corpuses of medical knowledge (Med-PaLM 2) to predict psychiatric functioning from patient interviews and clinical descriptions without being trained to do so. To assess this, n = 145 depression and n =115 PTSD assessments and n = 46 clinical case studies across high prevalence/high comorbidity disorders (Depressive, Anxiety, Psychotic, trauma and stress, Addictive disorders) were analyzed using prompts to extract estimated clinical scores and diagnoses. Results demonstrate that Med-PaLM 2 is capable of assessing psychiatric functioning across a range of psychiatric conditions with the strongest performance being the prediction of depression scores based on standardized assessments (Accuracy range= 0.80 - 0.84) which were statistically indistinguishable from human clinical raters t(1,144) = 1.20; p = 0.23. Results show the potential for general clinical language models to flexibly predict psychiatric risk based on free descriptions of functioning from both patients and clinicians.", + "ArxivID": "2308.01834v1", + "Authors": [ + { + "Affiliations": null, + "ForeName": null, + "FullName": "Isaac R. Galatzer-Levy", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Daniel McDuff", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Vivek Natarajan", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Alan Karthikesalingam", + "HashID": null, + "LastName": null, + "ORCID": null + }, + { + "Affiliations": null, + "ForeName": null, + "FullName": "Matteo Malgaroli", + "HashID": null, + "LastName": null, + "ORCID": null + } + ], + "CiteCrawlerDeep": 0, + "CitedBy": null, + "DOI": null, + "FlagAffiliationMining": null, + "FlagExtractKG": null, + "FlagExtractTopic": null, + "InsertType": null, + "Journal": "Arxiv", + "Keywords": null, + "NamedEntities": null, + "OreginalArticle": { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Isaac R. Galatzer-Levy" + }, + { + "name": "Daniel McDuff" + }, + { + "name": "Vivek Natarajan" + }, + { + "name": "Alan Karthikesalingam" + }, + { + "name": "Matteo Malgaroli" + } + ], + "category": [ + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.CL" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + }, + { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.LG" + } + ], + "id": "http://arxiv.org/abs/2308.01834v1", + "link": [ + { + "@href": "http://arxiv.org/abs/2308.01834v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/2308.01834v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2023-08-03T15:52:27Z", + "summary": "The current work investigates the capability of Large language models (LLMs)\nthat are explicitly trained on large corpuses of medical knowledge (Med-PaLM 2)\nto predict psychiatric functioning from patient interviews and clinical\ndescriptions without being trained to do so. To assess this, n = 145 depression\nand n =115 PTSD assessments and n = 46 clinical case studies across high\nprevalence/high comorbidity disorders (Depressive, Anxiety, Psychotic, trauma\nand stress, Addictive disorders) were analyzed using prompts to extract\nestimated clinical scores and diagnoses. Results demonstrate that Med-PaLM 2 is\ncapable of assessing psychiatric functioning across a range of psychiatric\nconditions with the strongest performance being the prediction of depression\nscores based on standardized assessments (Accuracy range= 0.80 - 0.84) which\nwere statistically indistinguishable from human clinical raters t(1,144) =\n1.20; p = 0.23. Results show the potential for general clinical language models\nto flexibly predict psychiatric risk based on free descriptions of functioning\nfrom both patients and clinicians.", + "title": "The Capability of Large Language Models to Measure Psychiatric\n Functioning", + "updated": "2023-08-03T15:52:27Z" + }, + "PMC": null, + "PMID": null, + "Published": "2023-08-03T15:52:27Z", + "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20OR%20abs%3A%22medical%22&id_list=&start=20&max_results=10", + "ReferenceCrawlerDeep": 0, + "References": null, + "SourceBank": 2, + "State": 2, + "Title": "The Capability of Large Language Models to Measure Psychiatric Functioning", + "Topics": null + } + } +} \ No newline at end of file diff --git a/docs/client-api.md b/docs/client-api.md index 339a201..97d7b0d 100644 --- a/docs/client-api.md +++ b/docs/client-api.md @@ -60,3 +60,170 @@ https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&db=pubmed https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&db=pubmed&id=35130239&retmode=json pubmed_pubmed_citedin + + +# arXiv +arXiv is a free distribution service and an open-access archive for nearly 2.4 million scholarly articles in the fields of physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics. Materials on this site are not peer-reviewed by arXiv. + +https://arxiv.org/ + +[arXiv Dataset](https://www.kaggle.com/datasets/Cornell-University/arxiv) +arXiv dataset and metadata of 1.7M+ scholarly papers across STEM + +## arXiv API Access + +arXiv offers public API access in order to maximize its openness and interoperability. Many projects utilize this option without becoming official [arXivLabs collaborations](https://labs.arxiv.org/). + +## arXivLabs: Showcase +arXiv is surrounded by a community of researchers and developers working at the cutting edge of information science and technology. + +https://info.arxiv.org/labs/showcase.html + + +## arXiv API User's Manual +https://info.arxiv.org/help/api/user-manual.html + +Please review the [Terms of Use for arXiv APIs](https://info.arxiv.org/help/api/tou.html) before using the arXiv API. + + +you can search for articles that contain electron AND proton with the API by entering + +http://export.arxiv.org/api/query?search_query=all:electron+AND+all:proton + +The parameters for each of the API methods are explained below. For each method, the base url is +``` +http://export.arxiv.org/api/{method_name}?{parameters} +``` + +| | | | | | +| --- | --- | --- | --- | --- | +| query | | | | | +| | **parameters** | **type** | **defaults** | **required** | +| | `search_query` | string | None | No | +| | `id_list` | comma-delimited string | None | No | +| | `start` | int | 0 | No | +| | `max_results` | int | 10 | No | + +### Details of Query Construction + +| | | +| --- | --- | +| **prefix** | **explanation** | +| ti | Title | +| au | Author | +| abs | Abstract | +| co | Comment | +| jr | Journal Reference | +| cat | Subject Category | +| rn | Report Number | +| id | Id (use `id_list` instead) | +| all | All of the above | + +### start and max_results paging +``` +http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=10 (1) +http://export.arxiv.org/api/query?search_query=all:electron&start=10&max_results=10 (2) +http://export.arxiv.org/api/query?search_query=all:electron&start=20&max_results=10 (3) +``` + + Get results 0-9 + + Get results 10-19 + + Get results 20-29 + +A request with `max_results >30,000` will result in an `HTTP 400 error code` with appropriate explanation. A request for 30000 results will typically take a little over 2 minutes to return a response of over 15MB. Requests for fewer results are much faster and correspondingly smaller. + +### sort order for return results + +There are two options for for the result set to the API search, sortBy and sortOrder. + +sortBy can be "relevance", "lastUpdatedDate", "submittedDate" + +sortOrder can be either "ascending" or "descending" + +A sample query using these new parameters looks like: +``` +http://export.arxiv.org/api/query?search_query=ti:"electron thermal conductivity"&sortBy=lastUpdatedDate&sortOrder=ascending +``` +### The API Response + +```xml + + + + ArXiv Query: search_query=all:electron&id_list=&start=0&max_results=1 + http://arxiv.org/api/cHxbiOdZaP56ODnBPIenZhzg5f8 + 2007-10-08T00:00:00-04:00 + 1000 + 0 + 1 + + http://arxiv.org/abs/hep-ex/0307015 + 2003-07-07T13:46:39-04:00 + 2003-07-07T13:46:39-04:00 + Multi-Electron Production at High Transverse Momenta in ep Collisions at + HERA + Multi-electron production is studied at high electron transverse momentum in + positron- and electron-proton collisions using the H1 detector at HERA. The + data correspond to an integrated luminosity of 115 pb-1. Di-electron and + tri-electron event yields are measured. Cross sections are derived in a + restricted phase space region dominated by photon-photon collisions. In general + good agreement is found with the Standard Model predictions. However, for + electron pair invariant masses above 100 GeV, three di-electron events and + three tri-electron events are observed, compared to Standard Model expectations + of 0.30 \pm 0.04 and 0.23 \pm 0.04, respectively. + + + H1 Collaboration + + 23 pages, 8 figures and 4 tables + Eur.Phys.J. C31 (2003) 17-29 + + + + + + + +``` + + +The `` element is used to describe either an arXiv, ACM, or MSC classification. See the [arXiv metadata explanation]() for more details about these classifications. + +``` +http://export.arxiv.org/api/query?search_query=au:del_maestro+AND+ti:%22quantum+criticality%22 +``` +This query returns one result, and notice that the feed `` contains double quotes as expected. The table below lists the two grouping operators used in the API. + +| | | | +| --- | --- | --- | +| **symbol** | **encoding** | **explanation** | +| ( ) | %28 %29 | Used to group Boolean expressions for Boolean operator precedence. | +| double quotes | %22 %22 | Used to group multiple words into phrases to search a particular field. | +| space | + | Used to extend a `search_query` to include multiple fields. | + +### arXiv identifier scheme - information for interacting services +https://info.arxiv.org/help/arxiv_identifier_for_services.html + + +The table below shows the correspondence between old and new identifier forms, internal and external identifiers, and semantics that can and cannot be derived from the identifier: + +| | Internal identifier | Preferred external <br>identifier | Year | Month | Version | Original primary <br>classification | Primary classification | Secondary classification | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| Old scheme | hep-th/9901001 <br>hep-th/9901001v1 <br>math.CA/0611800v2 | arXiv:hep-th/9901001 <br>arXiv:hep-th/9901001v1 <br>arXiv:math/0611800v2 | 1999 <br>1999 <br>2006 | 1 (Jan) <br>1 (Jan) <br>11 (Nov) | latest <br>v1 <br>v2 | hep-th <br>hep-th <br>math.CA | (in metadata) | (in metadata) | +| New scheme | 0704.0001 <br>0704.0001v1 <br>1412.7878 <br>1501.00001 <br>9912.12345v2 | arXiv:0704.0001 <br>arXiv:0704.0001v1 <br>arXiv:1412.7878 <br>arXiv:1501.00001 <br>arXiv:9912.12345v2 | 2007 <br>2007 <br>2014 <br>2015 <br>2099 | 6 (Jun) <br>6 (Jun) <br>12 (Dec) <br>1 (Jan) <br>12 (Dec) | latest <br>v1 <br>latest <br>latest <br>v2 | (in announcement log) | (in metadata) | (in metadata) | + +### URLs for standard arXiv functions +The URL patterns for all standard arXiv functions are consistent for the different forms of the arXiv identifier. Some examples are given in the table below: + +| | Generic | Example with old id (9107-0703) | Example with new id (0704-1412) | Example new id (1501-) | +| --- | --- | --- | --- | --- | +| Abstract (normal HTML) | `/abs/id` | `/abs/hep-th/9901001` | `/abs/0706.0001` | `/abs/1501.00001` | +| Abstract (raw txt) | `/abs/id?fmt=txt` | `/abs/hep-th/9901001?fmt=txt` | `/abs/0706.0001?fmt=txt` | `/abs/1501.00001?fmt=txt` | +| PDF | `/pdf/id.pdf` | `/pdf/hep-th/9901001.pdf` | `/pdf/0706.0001.pdf` | `/pdf/1501.00001.pdf` | +| PS | `/ps/id` | `/ps/hep-th/9901001` | `/ps/0706.0001` | `/ps/1501.00001` | +| Source (.gz,.tar.gz,.pdf...) | `/src/id` | `/src/hep-th/9901001` | `/src/0706.0001` | `/src/1501.00001` | +| Trackbacks | `/tb/id` | `/tb/hep-th/9901001` | `/tb/0706.0001` | `/tb/1501.00001` | +| New listings | `/list/arch-ive/new` | `/list/hep-th/new` | `/list/hep-th/new` | `/list/hep-th/new` | +| Month listings | `/list/arch-ive/yymm` | `/list/hep-th/0601` | `/list/hep-th/0601` | `/list/hep-th/0601` | \ No newline at end of file diff --git a/jupyter_lab/database/Arxiv_test.json b/jupyter_lab/database/Arxiv_test.json new file mode 100644 index 0000000..c65a72c --- /dev/null +++ b/jupyter_lab/database/Arxiv_test.json @@ -0,0 +1 @@ +{"_default": {"1": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "2": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "3": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "4": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "5": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "6": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "7": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "8": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "9": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "10": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "11": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "12": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "13": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "14": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "15": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "16": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "17": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "18": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "19": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "20": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "21": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "22": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "23": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "24": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "25": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "26": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "27": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "28": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "29": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "30": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "31": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "32": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "33": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "34": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "35": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "36": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "37": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "38": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "39": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "40": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "41": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "42": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "43": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "44": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "45": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "46": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "47": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "48": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "49": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "50": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "51": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "52": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "53": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "54": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "55": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "56": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "57": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "58": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "59": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "60": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "61": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "62": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "63": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "64": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "65": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "66": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "67": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "68": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "69": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "70": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "71": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "72": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "73": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "74": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "75": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "76": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "77": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "78": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "79": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "80": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "81": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "82": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "83": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "84": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "85": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "86": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "87": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "88": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "89": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "90": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "91": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "92": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "93": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "94": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "95": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "96": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "97": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "98": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "99": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "100": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "101": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "102": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "103": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "104": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "105": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "106": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "107": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "108": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "109": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "110": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "111": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "112": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "113": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "114": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "115": {"Abstract": null, "ArxivID": null, "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"PubmedArticleSet": {"PubmedArticle": {"MedlineCitation": {"@IndexingMethod": "Automated", "@Owner": "NLM", "@Status": "Publisher", "Article": {"@PubModel": "Print-Electronic", "Abstract": {"AbstractText": [{"#text": "To analyze the quality and readability of information regarding shoulder stabilization surgery available using an online AI software (ChatGPT), using standardized scoring systems, as well as to report on the given answers by the AI.", "@Label": "PURPOSE", "@NlmCategory": "OBJECTIVE"}, {"#text": "An open AI model (ChatGPT) was used to answer 23 commonly asked questions from patients on shoulder stabilization surgery. These answers were evaluated for medical accuracy, quality, and readability using The JAMA Benchmark criteria, DISCERN score, Flesch-Kincaid Reading Ease Score (FRES) & Grade Level (FKGL).", "@Label": "METHODS", "@NlmCategory": "METHODS"}, {"#text": "The JAMA Benchmark criteria score was 0, which is the lowest score, indicating no reliable resources cited. The DISCERN score was 60, which is considered a good score. The areas that open AI model did not achieve full marks were also related to the lack of available source material used to compile the answers, and finally some shortcomings with information not fully supported by the literature. The FRES was 26.2, and the FKGL was considered to be that of a college graduate.", "@Label": "RESULTS", "@NlmCategory": "RESULTS"}, {"#text": "There was generally high quality in the answers given on questions relating to shoulder stabilization surgery, but there was a high reading level required to comprehend the information presented. However, it is unclear where the answers came from with no source material cited. It is important to note that the ChatGPT software repeatedly references the need to discuss these questions with an orthopaedic surgeon and the importance of shared discussion making, as well as compliance with surgeon treatment recommendations.", "@Label": "CONCLUSION", "@NlmCategory": "CONCLUSIONS"}, {"#text": "As shoulder instability is an injury that predominantly affects younger individuals who may use the Internet for information, this study shows what information patients may be getting online.", "@Label": "CLINICAL RELEVANCE", "@NlmCategory": "CONCLUSIONS"}], "CopyrightInformation": "Copyright \u00a9 2023 Arthroscopy Association of North America. Published by Elsevier Inc. All rights reserved."}, "ArticleDate": {"@DateType": "Electronic", "Day": "09", "Month": "08", "Year": "2023"}, "ArticleTitle": "Evaluation High-Quality of Information from ChatGPT (Artificial Intelligence-Large Language Model) Artificial Intelligence on Shoulder Stabilization Surgery.", "AuthorList": {"@CompleteYN": "Y", "Author": [{"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A.. Electronic address: eoghan.hurley@duke.edu."}, "ForeName": "Eoghan T", "Initials": "ET", "LastName": "Hurley"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Bryan S", "Initials": "BS", "LastName": "Crook"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Samuel G", "Initials": "SG", "LastName": "Lorentz"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Richard M", "Initials": "RM", "LastName": "Danilkowicz"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Brian C", "Initials": "BC", "LastName": "Lau"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Dean C", "Initials": "DC", "LastName": "Taylor"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Jonathan F", "Initials": "JF", "LastName": "Dickens"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Oke", "Initials": "O", "LastName": "Anakwenze"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Christopher S", "Initials": "CS", "LastName": "Klifto"}]}, "ELocationID": [{"#text": "S0749-8063(23)00642-4", "@EIdType": "pii", "@ValidYN": "Y"}, {"#text": "10.1016/j.arthro.2023.07.048", "@EIdType": "doi", "@ValidYN": "Y"}], "Journal": {"ISOAbbreviation": "Arthroscopy", "ISSN": {"#text": "1526-3231", "@IssnType": "Electronic"}, "JournalIssue": {"@CitedMedium": "Internet", "PubDate": {"Day": "09", "Month": "Aug", "Year": "2023"}}, "Title": "Arthroscopy : the journal of arthroscopic & related surgery : official publication of the Arthroscopy Association of North America and the International Arthroscopy Association"}, "Language": "eng", "PublicationTypeList": {"PublicationType": {"#text": "Journal Article", "@UI": "D016428"}}}, "CitationSubset": "IM", "DateRevised": {"Day": "23", "Month": "08", "Year": "2023"}, "MedlineJournalInfo": {"Country": "United States", "ISSNLinking": "0749-8063", "MedlineTA": "Arthroscopy", "NlmUniqueID": "8506498"}, "PMID": {"#text": "37567487", "@Version": "1"}}, "PubmedData": {"ArticleIdList": {"ArticleId": [{"#text": "37567487", "@IdType": "pubmed"}, {"#text": "10.1016/j.arthro.2023.07.048", "@IdType": "doi"}, {"#text": "S0749-8063(23)00642-4", "@IdType": "pii"}]}, "History": {"PubMedPubDate": [{"@PubStatus": "received", "Day": "29", "Month": "3", "Year": "2023"}, {"@PubStatus": "revised", "Day": "27", "Month": "6", "Year": "2023"}, {"@PubStatus": "accepted", "Day": "28", "Month": "7", "Year": "2023"}, {"@PubStatus": "pubmed", "Day": "12", "Hour": "10", "Minute": "42", "Month": "8", "Year": "2023"}, {"@PubStatus": "medline", "Day": "12", "Hour": "10", "Minute": "42", "Month": "8", "Year": "2023"}, {"@PubStatus": "entrez", "Day": "11", "Hour": "19", "Minute": "27", "Month": "8", "Year": "2023"}]}, "PublicationStatus": "aheadofprint"}}}}, "PMC": null, "PMID": "37567487", "Published": null, "QueryTranslation": "\"large language model\"[Title] AND \"Benchmark\"[Title/Abstract]", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 1, "State": -1, "Title": null, "Topics": null}}} \ No newline at end of file diff --git a/jupyter_lab/pipeline.ipynb b/jupyter_lab/pipeline.ipynb new file mode 100644 index 0000000..b7fbaeb --- /dev/null +++ b/jupyter_lab/pipeline.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pipleline Sample\n", + "This sample show the core functionality" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib.parse\n", + "from triplea.service.repository.state.initial_arxiv import get_article_list_from_arxiv_all_store_to_arepo\n", + "\n", + "\n", + "arxiv_search_string = 'ti:\"large+language+model\"+AND+ti:Benchmark'\n", + "arxiv_search_string = 'ti:\"large language model\" AND ti:Benchmark'\n", + "arxiv_search_string= urllib.parse.quote(arxiv_search_string)\n", + "print(arxiv_search_string)\n", + "get_article_list_from_arxiv_all_store_to_arepo(arxiv_search_string,0,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32mTotal number of article is 1\u001b[0m\n", + "\u001b[32m Round (1) : Get another 1 record (Total 1 record)\u001b[0m\n", + "\u001b[32madd 37567487 to knowledge repository. (1)\u001b[0m\n", + "\u001b[32m Round (2):\n", + " Get another -1 record (total 1 record)\u001b[0m\n" + ] + } + ], + "source": [ + "from triplea.service.repository.state.initial import get_article_list_from_pubmed_all_store_to_arepo\n", + "\n", + "\n", + "pubmed_search_string = '(\"large language model\"[Title]) AND (Benchmark[Title/Abstract])'\n", + "get_article_list_from_pubmed_all_store_to_arepo(pubmed_search_string)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get info of Article Repository\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32mNumber of article in article repository is 115\u001b[0m\n", + "[{'State': -2, 'n': 0}, {'State': -1, 'n': 115}, {'State': 0, 'n': 0}, {'State': 1, 'n': 0}, {'State': 2, 'n': 0}, {'State': 3, 'n': 0}, {'State': 4, 'n': 0}]\n", + "\u001b[32m115 article(s) in state -1.\u001b[0m\n" + ] + } + ], + "source": [ + "from triplea.service.click_logger import logger\n", + "from triplea.service.repository import persist\n", + "\n", + "\n", + "logger.INFO(\n", + " \"Number of article in article repository is \"\n", + " + str(persist.get_all_article_count())\n", + ")\n", + "\n", + "data = persist.get_article_group_by_state()\n", + "for i in range(-3, 7):\n", + " for s in data:\n", + " if s[\"State\"] == i:\n", + " w = 1\n", + " n = s[\"n\"]\n", + " if n != 0:\n", + " logger.INFO(f\"{n} article(s) in state {i}.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Moving Forward\n", + "We move from state `0` to state `3`\n", + "The best approach is to finalize state all the article in the `core state`.\n", + "\n", + "### Moving from `0` to `1`" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m1 Article(s) is in state 0\u001b[0m\n", + "Article 37567487 with state 0 forward to 1\n" + ] + } + ], + "source": [ + "from triplea.service.repository.pipeline_core import move_state_forward\n", + "\n", + "move_state_forward(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Moving from `1` to `2`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "move_state_forward(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/jupyter_lab/sample.md b/jupyter_lab/sample.md new file mode 100644 index 0000000..e69de29 diff --git a/jupyter_lab/selection-sampling.ipynb b/jupyter_lab/selection-sampling.ipynb index de3ee45..840119b 100644 --- a/jupyter_lab/selection-sampling.ipynb +++ b/jupyter_lab/selection-sampling.ipynb @@ -542,7 +542,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.5" }, "orig_nbformat": 4 }, diff --git a/temp-arxiv.json b/temp-arxiv.json new file mode 100644 index 0000000..91067ac --- /dev/null +++ b/temp-arxiv.json @@ -0,0 +1,139 @@ +{ + "feed": { + "@xmlns": "http://www.w3.org/2005/Atom", + "entry": [ + { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "stat.ME", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": { + "name": "Gerhard Tutz" + }, + "category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "stat.ME" + }, + "id": "http://arxiv.org/abs/1906.03851v1", + "link": [ + { + "@href": "http://arxiv.org/abs/1906.03851v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/1906.03851v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2019-06-10T09:09:13Z", + "summary": "Ordered item response models that are in common use can be divided into three\ngroups, cumulative, sequential and adjacent categories model. The derivation\nand motivation of the models is typically based on the assumed presence of\nlatent traits or underlying process models. In the construction frequently\nbinary models play an important role. The objective of this paper is to give\nmotivations for the models and to clarify the role of the binary models for the\nvarious types of ordinal models. It is investigated which binary models are\nincluded in an ordinal model but also how the models can be constructed from a\nsequence of binary models. In all the models one finds a Guttman space\nstructure, which has previously been investigated in particular for the partial\ncredit model. The consideration of the binary models adds to the interpretation\nof model parameters, which is helpful, in particular, in the case of the\npartial credit model, for which interpretation is less straightforward than for\nthe other models. A specific topic that is addressed is the ordering of\nthresholds in the partial credit model because for some researchers reversed\nordering is an anomaly, others disagree. It is argued that the ordering of\nthresholds is not a constitutive element of the partial credit model.", + "title": "On the Structure of Ordered Latent Trait Models", + "updated": "2019-06-10T09:09:13Z" + }, + { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "stat.CO", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Niamh Russell" + }, + { + "name": "Thomas Brendan Murphy" + }, + { + "name": "Adrian E Raftery" + } + ], + "category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "stat.CO" + }, + "id": "http://arxiv.org/abs/1506.09035v1", + "link": [ + { + "@href": "http://arxiv.org/abs/1506.09035v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/1506.09035v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2015-06-30T11:17:40Z", + "summary": "We propose Bayesian model averaging (BMA) as a method for postprocessing the\nresults of model-based clustering. Given a number of competing models,\nappropriate model summaries are averaged, using the posterior model\nprobabilities, instead of being taken from a single \"best\" model. We\ndemonstrate the use of BMA in model-based clustering for a number of datasets.\nWe show that BMA provides a useful summary of the clustering of observations\nwhile taking model uncertainty into account. Further, we show that BMA in\nconjunction with model-based clustering gives a competitive method for density\nestimation in a multivariate setting. Applying BMA in the model-based context\nis fast and can give enhanced modeling performance.", + "title": "Bayesian model averaging in model-based clustering and density\n estimation", + "updated": "2015-06-30T11:17:40Z" + }, + { + "arxiv:primary_category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI", + "@xmlns:arxiv": "http://arxiv.org/schemas/atom" + }, + "author": [ + { + "name": "Volker Tresp" + }, + { + "name": "Maximilian Nickel" + } + ], + "category": { + "@scheme": "http://arxiv.org/schemas/atom", + "@term": "cs.AI" + }, + "id": "http://arxiv.org/abs/1609.03145v1", + "link": [ + { + "@href": "http://arxiv.org/abs/1609.03145v1", + "@rel": "alternate", + "@type": "text/html" + }, + { + "@href": "http://arxiv.org/pdf/1609.03145v1", + "@rel": "related", + "@title": "pdf", + "@type": "application/pdf" + } + ], + "published": "2016-09-11T10:14:18Z", + "summary": "We provide a survey on relational models. Relational models describe complete\nnetworked {domains by taking into account global dependencies in the data}.\nRelational models can lead to more accurate predictions if compared to\nnon-relational machine learning approaches. Relational models typically are\nbased on probabilistic graphical models, e.g., Bayesian networks, Markov\nnetworks, or latent variable models. Relational models have applications in\nsocial networks analysis, the modeling of knowledge graphs, bioinformatics,\nrecommendation systems, natural language processing, medical decision support,\nand linked data.", + "title": "Relational Models", + "updated": "2016-09-11T10:14:18Z" + } + ], + "id": "http://arxiv.org/api/3j7MVc7VO3Ua/raWFnIzp9O91s0", + "link": { + "@href": "http://arxiv.org/api/query?search_query%3Dall%3Amodel%26id_list%3D%26start%3D1%26max_results%3D3", + "@rel": "self", + "@type": "application/atom+xml" + }, + "opensearch:itemsPerPage": { + "#text": "3", + "@xmlns:opensearch": "http://a9.com/-/spec/opensearch/1.1/" + }, + "opensearch:startIndex": { + "#text": "1", + "@xmlns:opensearch": "http://a9.com/-/spec/opensearch/1.1/" + }, + "opensearch:totalResults": { + "#text": "827057", + "@xmlns:opensearch": "http://a9.com/-/spec/opensearch/1.1/" + }, + "title": { + "#text": "ArXiv Query: search_query=all:model&id_list=&start=1&max_results=3", + "@type": "html" + }, + "updated": "2023-12-28T00:00:00-05:00" + } +} \ No newline at end of file diff --git a/triplea/client/arxiv/__init__.py b/triplea/client/arxiv/__init__.py new file mode 100644 index 0000000..6893706 --- /dev/null +++ b/triplea/client/arxiv/__init__.py @@ -0,0 +1,42 @@ +from triplea.config.settings import SETTINGS +import requests +import xmltodict +import json +from triplea.service.click_logger import logger + + +def get_article_list_from_arxiv(search_query: str, start: int, max_results: int) -> dict: + URL = "http://export.arxiv.org/api/query?" + + # defining a params dict for the parameters to be sent to the API + PARAMS = { + "search_query": search_query, + "start": start, + "max_results": max_results, + } + + headers = {"User-Agent": SETTINGS.AAA_CLIENT_AGENT} + + # To use HTTP Basic Auth with your proxy, use the http://user:password@host.com/ syntax: + if SETTINGS.AAA_PROXY_HTTP is not None: + proxy_servers = { + "http": SETTINGS.AAA_PROXY_HTTP, + "https": SETTINGS.AAA_PROXY_HTTPS, + } + else: + proxy_servers = None + + # sending get request and saving the response as response object + try: + r = requests.get(url=URL, params=PARAMS, headers=headers, proxies=proxy_servers) + except Exception: + raise Exception("Connection Error.") + + # Convert XML to Json + if r.status_code == 200: + xml = r.content + data_dict = xmltodict.parse(xml) + return data_dict + else: + raise Exception(f"Error HTTP : {r.status_code}") + \ No newline at end of file diff --git a/triplea/config/environment_variable/.env.sample b/triplea/config/environment_variable/.env.sample index 8936f2f..0372369 100644 --- a/triplea/config/environment_variable/.env.sample +++ b/triplea/config/environment_variable/.env.sample @@ -9,5 +9,6 @@ AAA_PROXY_HTTP = AAA_PROXY_HTTPS = AAA_REFF_CRAWLER_DEEP = 1 AAA_CITED_CRAWLER_DEEP = 1 -AAA_TOPIC_EXTRACT_ENDPOINT=http://localhost:8001/api/v1/topic/ -AAA_CLIENT_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0" \ No newline at end of file +AAA_TOPIC_EXTRACT_ENDPOINT =http://localhost:8001/api/v1/topic/ +AAA_CLIENT_AGENT ="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0" +AAA_CLI_ALERT_POINT = 500 \ No newline at end of file diff --git a/triplea/config/settings.py b/triplea/config/settings.py index 4108ec2..7afd887 100644 --- a/triplea/config/settings.py +++ b/triplea/config/settings.py @@ -51,9 +51,11 @@ class Settings(BaseSettings): ) VERSION: Optional[str] = ( - version + ".002" + version + ".001" ) # Change this micro version in the development process + AAA_CLI_ALERT_POINT: Optional[int] = os.getenv("AAA_CLI_ALERT_POINT", 500) + # class Config: # case_sensitive = True # # env_file = ROOT / 'config' / 'enviroment_variable' / '.env' diff --git a/triplea/db/mongodb.py b/triplea/db/mongodb.py index 359c817..b8a9dc6 100644 --- a/triplea/db/mongodb.py +++ b/triplea/db/mongodb.py @@ -48,6 +48,21 @@ def get_article_pmid_list_by_state(self, state: int): return [] else: return new_la + + def get_article_id_list_by_state(self, state: int): + myquery = {"State": state} + cursor = self.col_article.find(myquery, projection={"SourceBank": "$SourceBank", "_id": 1}) + # TODO _id + + la = list(cursor) + new_la = [] + for c in la: + new_la.append(c["_id"]) + + if len(new_la) == 0: + return [] + else: + return new_la def get_article_pmid_list_by_cstate(self, state: int, tag_field: str): if state is None or state == 0: @@ -101,6 +116,16 @@ def get_article_by_pmid(self, pmid: str): # la.append(d) # return la + def get_article_by_id(self, id: str): + myquery = {"_id": id} + cursor = self.col_article.find(myquery) + + if len(list(cursor.clone())) == 0: + return None + else: + la = list(cursor) + return la[0] + def update_article_by_pmid(self, article: Article, pmid: str): article_json = json.loads( json.dumps(article, default=lambda o: o.__dict__, sort_keys=True, indent=4) @@ -109,6 +134,17 @@ def update_article_by_pmid(self, article: Article, pmid: str): r = self.col_article.replace_one(myquery, article_json) return r.raw_result + def update_article_by_id(self, article: Article, id: str): + article_json = json.loads( + json.dumps(article, + default=lambda o: o.__dict__, + sort_keys=True, + indent=4) + ) + myquery = {"_id": id} + r = self.col_article.replace_one(myquery, article_json) + return r.raw_result + def is_article_exist_by_pmid(self, pmid: str) -> bool: """ > Check if the article with the given PMID exists in the database @@ -122,6 +158,14 @@ def is_article_exist_by_pmid(self, pmid: str) -> bool: return True else: return False + + def is_article_exist_by_arxiv_id(self,id:str)->bool: + myquery = {"ArxivID": id} + if self.col_article.count_documents(myquery) > 0: + return True + else: + return False + def get_all_article_count(self) -> int: """ diff --git a/triplea/db/tinydb.py b/triplea/db/tinydb.py index 9a1abde..f1308b0 100644 --- a/triplea/db/tinydb.py +++ b/triplea/db/tinydb.py @@ -27,7 +27,10 @@ class DB_TinyDB(DataBase): def add_new_article(self, article: Article) -> int: article_json = json.loads( - json.dumps(article, default=lambda o: o.__dict__, sort_keys=True, indent=4) + json.dumps(article, + default=lambda o: o.__dict__, + sort_keys=True, + indent=4) ) # article_json = json.dumps(article.json()) return self.db.insert(article_json) @@ -41,6 +44,11 @@ def get_article_pmid_list_by_state(self, state: int): l_pmid = [a.get("PMID") for a in self.db.search(q.State == state)] return l_pmid + def get_article_id_list_by_state(self, state: int): + q = Query() + l_pmid = [a.get("ArxivID") for a in self.db.search(q.State == state)] + return l_pmid + def get_article_pmid_list_by_cstate(self, state: int, tag_field: str): q = Query() if state is None or state == 0: @@ -72,6 +80,10 @@ def get_count_article_by_state(self, state: int): def get_article_by_pmid(self, pmid: str): q = Query() return self.db.get(q.PMID == pmid) + + def get_article_by_id(self, id: str): + q = Query() + return self.db.get(q.id == id) def update_article_by_pmid(self, article: Article, pmid: str): article_json = json.loads( @@ -80,6 +92,17 @@ def update_article_by_pmid(self, article: Article, pmid: str): q = Query() return self.db.update(article_json, q.PMID == pmid) + def update_article_by_id(self, article: Article, id: str): + article_json = json.loads( + json.dumps(article, + default=lambda o: o.__dict__, + sort_keys=True, + indent=4) + ) + q = Query() + return self.db.update(article_json, q.ID == id) + + def is_article_exist_by_pmid(self, pmid: str) -> bool: """ > Check if the article with the given PMID exists in the database @@ -90,6 +113,10 @@ def is_article_exist_by_pmid(self, pmid: str) -> bool: """ q = Query() return self.db.contains(q.PMID == pmid) + + def is_article_exist_by_arxiv_id(self,id:str)->bool: + q = Query() + return self.db.contains(q.ArxivID == id) def get_all_article_count(self) -> int: """ @@ -130,7 +157,10 @@ def get_all_nodes(self): def add_new_edge(self, edge: Edge) -> int: edge_json = json.loads( - json.dumps(edge, default=lambda o: o.__dict__, sort_keys=True, indent=4) + json.dumps(edge, + default=lambda o: o.__dict__, + sort_keys=True, + indent=4) ) table = self.db.table("edge") return table.insert(edge_json) diff --git a/triplea/schemas/article.py b/triplea/schemas/article.py index 2f01e3f..7c987d6 100644 --- a/triplea/schemas/article.py +++ b/triplea/schemas/article.py @@ -1,7 +1,7 @@ from pydantic import BaseModel, Field from typing import Optional import enum - +from datetime import datetime class NamedEntity(BaseModel): Label: Optional[str] = Field(description="") @@ -23,6 +23,11 @@ class AffiliationParseMethod(enum.IntEnum): TITIPATA_API = 2 # https://github.com/titipata/affiliation_parser + +class SourceBankType(enum.IntEnum): + PUBMED = 1 + ARXIV = 2 + class Affiliation(BaseModel): HashID: Optional[str] Text: Optional[str] = Field(description="") @@ -53,6 +58,7 @@ class Author(BaseModel): class Article(BaseModel): + SourceBank: Optional[SourceBankType] = Field(description="") PMID: Optional[str] = Field( description="""the PubMed (NLM database that incorporates MEDLINE) unique identifier, is a 1 to 8-digit accession number @@ -91,3 +97,6 @@ class Article(BaseModel): FlagExtractKG: Optional[int] = Field(description="") FlagAffiliationMining: Optional[int] = Field(description="") FlagExtractTopic: Optional[int] = Field(description="") + + Published: Optional[datetime] = Field(description="") + ArxivID: Optional[str] = Field(description="") diff --git a/triplea/service/repository/persist.py b/triplea/service/repository/persist.py index 91feffb..03cc276 100644 --- a/triplea/service/repository/persist.py +++ b/triplea/service/repository/persist.py @@ -37,6 +37,10 @@ def get_article_pmid_list_by_state(state: int): return db.get_article_pmid_list_by_state(state) +def get_article_id_list_by_state(state: int): + return db.get_article_id_list_by_state(state) + + def get_article_pmid_list_by_cstate(state: int, tag_field: str): """ This function returns a list of PubMed IDs (PMIDs) of articles @@ -78,6 +82,9 @@ def get_article_by_pmid(pmid: str): return db.get_article_by_pmid(pmid) +def get_article_by_id(id: str): + return db.get_article_by_id(id) + def update_article_by_pmid(article, pmid: str): """ This function updates an article in the database by its pmid @@ -89,46 +96,71 @@ def update_article_by_pmid(article, pmid: str): """ return db.update_article_by_pmid(article, pmid) +def update_article_by_id(article, id: str): + return db.update_article_by_id(article, id) + +# # Expire Function +# def insert_new_pmid( +# pmid: str, +# querytranslation: Optional[str] = None, +# insert_type: Optional[str] = None, +# reference_crawler_deep: Optional[int] = 0, +# cite_crawler_deep: Optional[int] = 0, +# ): +# """ +# If the article is not in the database, add it + +# :param pmid: The PMID of the article you want to insert +# :type pmid: str +# :return: The return value is the ID of the newly inserted article. +# """ +# # check PMID is exist +# if db.is_article_exist_by_pmid(pmid): +# logger.DEBUG("The article " + pmid + " already exists.", deep=3) +# return +# else: # Insert not exist Article +# insert_type_list = [] +# if insert_type is not None: +# insert_type_list.append(insert_type) + +# # # old version +# # a = Article(PMID = pmid, +# # State= 0, +# # QueryTranslation = querytranslation, +# # InsertType= insert_type_list, +# # ReferenceCrawlerDeep = reference_crawler_deep) +# # New version +# a = Article( +# PMID=pmid, +# State=0, +# QueryTranslation=querytranslation, +# ReferenceCrawlerDeep=reference_crawler_deep, +# CiteCrawlerDeep=cite_crawler_deep, +# ) + +# return db.add_new_article(a) + + + +def insert_new_pubmed(article:Article): + # check PMID is exist + if db.is_article_exist_by_pmid(article.PMID): + # logger.DEBUG(f"The article With PMID {article.PMID} already exists.", + # deep=3) + return + else: # Insert not exist Article + return db.add_new_article(article) -def insert_new_pmid( - pmid: str, - querytranslation: Optional[str] = None, - insert_type: Optional[str] = None, - reference_crawler_deep: Optional[int] = 0, - cite_crawler_deep: Optional[int] = 0, -): - """ - If the article is not in the database, add it - :param pmid: The PMID of the article you want to insert - :type pmid: str - :return: The return value is the ID of the newly inserted article. - """ - # check PMID is exist - if db.is_article_exist_by_pmid(pmid): - logger.DEBUG("The article " + pmid + " already exists.", deep=3) +def insert_new_arxiv(article:Article): + # check Arxiv ID is exist + if db.is_article_exist_by_arxiv_id(article.ArxivID): + # logger.DEBUG( + # f"The article with ArxivID {article.ArxivID} already exists.", + # deep=3) return else: # Insert not exist Article - insert_type_list = [] - if insert_type is not None: - insert_type_list.append(insert_type) - - # # old version - # a = Article(PMID = pmid, - # State= 0, - # QueryTranslation = querytranslation, - # InsertType= insert_type_list, - # ReferenceCrawlerDeep = reference_crawler_deep) - # New version - a = Article( - PMID=pmid, - State=0, - QueryTranslation=querytranslation, - ReferenceCrawlerDeep=reference_crawler_deep, - CiteCrawlerDeep=cite_crawler_deep, - ) - - return db.add_new_article(a) + return db.add_new_article(article) def get_all_article_count() -> int: diff --git a/triplea/service/repository/pipeline_core.py b/triplea/service/repository/pipeline_core.py index d7d3d28..df25764 100644 --- a/triplea/service/repository/pipeline_core.py +++ b/triplea/service/repository/pipeline_core.py @@ -4,7 +4,7 @@ import click from triplea.config.settings import SETTINGS from triplea.service.click_logger import logger -from triplea.schemas.article import Article +from triplea.schemas.article import Article, SourceBankType import triplea.service.repository.state as state_manager import triplea.service.repository.persist as persist @@ -85,40 +85,44 @@ def move_state_forward( you want to make to the API, defaults to 1 :type tps_limit: Optional[int] (optional) """ + # old version + # la = get_article_by_state(state) - # la = get_article_by_state(state) # old version - l_pmid = persist.get_article_pmid_list_by_state(state) - total_article_in_current_state = len(l_pmid) - number_of_article_move_forward = 0 - logger.DEBUG(str(len(l_pmid)) + " Article(s) is in state " + str(state)) + # old version 0.0.3 + # l_pmid = persist.get_article_pmid_list_by_state(state) + l_id = persist.get_article_id_list_by_state(state) + total_article_in_current_state = len(l_id) + n = 0 + logger.DEBUG(str(len(l_id)) + " Article(s) is in state " + str(state)) - bar = click.progressbar(length=len(l_pmid), show_pos=True, show_percent=True) + bar = click.progressbar(length=len(l_id), show_pos=True, show_percent=True) refresh_point = 0 - for id in l_pmid: + for id in l_id: try: - number_of_article_move_forward = number_of_article_move_forward + 1 + n = n + 1 current_state = None - if refresh_point == 500: + if refresh_point == SETTINGS.AAA_CLI_ALERT_POINT: refresh_point = 0 persist.refresh() print() logger.INFO( - f"There are {str(total_article_in_current_state - number_of_article_move_forward)} article(s) left ", # noqa: E501 + f"There are {str(total_article_in_current_state - n)} article(s) left ", # noqa: E501 forecolore="yellow", ) else: refresh_point = refresh_point + 1 - a = persist.get_article_by_pmid(id) - # a = persist.get_article_by_pmid('35970485') # CRITICAL For Test and Debug + a = persist.get_article_by_id(id) + # CRITICAL For Test and Debug + # a = persist.get_article_by_pmid('35970485') try: updated_article = Article(**a.copy()) except Exception: print() - print(logger.ERROR(f"Error in parsing article. PMID = {id}")) + print(logger.ERROR(f"Error in parsing article with ID = {id}")) raise Exception("Article Not Parsed.") try: @@ -126,13 +130,25 @@ def move_state_forward( except Exception: current_state = 0 - # logger.DEBUG(f"""Article {updated_article.PMID} - # with state {str(current_state)} forward to - # {str(current_state + 1)}""") + source_bank = updated_article.SourceBank + + if source_bank == None: + article_source_bank_title = "Pubmed" + article_identifier = updated_article.PMID + source_bank = SourceBankType.PUBMED + elif source_bank == SourceBankType.PUBMED: + article_source_bank_title = "Pubmed" + article_identifier = updated_article.PMID + elif source_bank == SourceBankType.ARXIV: + article_source_bank_title = "Arxiv" + article_identifier = updated_article.ArxivID + else: + raise NotImplementedError + bar.label = ( - "Article " - + updated_article.PMID - + " with state " + "Article " + article_source_bank_title + " (" + + article_identifier + + ") with state " + str(current_state) + " forward to " + str(current_state + 1) @@ -143,48 +159,31 @@ def move_state_forward( if current_state is None: updated_article = state_manager.expand_details(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + # persist.update_article_by_pmid(updated_article, + # updated_article.PMID) elif current_state == -1: # Error in State 0 Net state: 1 updated_article = state_manager.parsing_details(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + # persist.update_article_by_pmid(updated_article, + # updated_article.PMID) elif current_state == 0: # Net state: get article details from pubmed updated_article = state_manager.expand_details(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + # persist.update_article_by_pmid(updated_article, + # updated_article.PMID) elif current_state == 1: # Net state: Extract Data updated_article = state_manager.parsing_details(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) - # # think after - # if len(l) == 1: - # pass - # else: - # logger.ERROR('Duplication has Occurred') + # persist.update_article_by_pmid(updated_article, + # updated_article.PMID) elif current_state == 2: # Net state: Get Citation updated_article = state_manager.get_citation(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) - # think after - # if len(l) == 1: - # pass - # else: - # logger.ERROR('Duplication has Occurred') + # persist.update_article_by_pmid(updated_article, + # updated_article.PMID) - elif current_state == 3: # Net state: NER Title - updated_article = state_manager.ner_title(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) - # think after - # if len(l) == 1: - # pass - # else: - # logger.ERROR('Duplication has Occurred') + persist.update_article_by_id(updated_article, + id) except Exception: if current_state == 1: @@ -231,22 +230,4 @@ def move_state_forward( persist.refresh() -if __name__ == "__main__": - logger.WARNING( - "Number of article in knowlege repository is " - + str(persist.get_all_article_count()) - ) - logger.WARNING(f"""{persist.get_all_node_count()} Node(s) - in knowlege repository.""") - logger.WARNING(f"""{persist.get_all_edge_count()} Edge(s) - in knowlege repository.""") - data = persist.get_article_group_by_state() - for i in range(-3, 7): - w = 0 - for s in data: - if s["State"] == i: - w = 1 - n = s["n"] - logger.INFO(f"{n} article(s) in state {i}.") - if w == 0: - logger.INFO(f"0 article(s) in state {i}.") + diff --git a/triplea/service/repository/state/expand_details.py b/triplea/service/repository/state/expand_details.py index 5a67043..d49852e 100644 --- a/triplea/service/repository/state/expand_details.py +++ b/triplea/service/repository/state/expand_details.py @@ -2,24 +2,53 @@ import time from triplea.client.pubmed import get_article_details_from_pubmed from triplea.config.settings import SETTINGS -from triplea.schemas.article import Article +from triplea.schemas.article import Article, SourceBankType from triplea.service.click_logger import logger tps_limit = SETTINGS.AAA_TPS_LIMIT -def expand_details(article: Article) -> Article: +def _expand_details_arxiv(article: Article) -> Article: article.State = 1 + # Archive is One Shot. There is no need for this step, + # although it should be checked why it is at this step + return article + + + +def _expand_details_pubmed(article: Article) -> Article: + # previous state is 0 + article.State = 1 # next state + backward_state = 0 sleep_time = 1 / tps_limit time.sleep(sleep_time) try: oa = get_article_details_from_pubmed(article.PMID) article.OreginalArticle = oa except Exception: - article.State = 0 + article.State = backward_state exc_type, exc_value, exc_tb = sys.exc_info() print() logger.ERROR(f"Error {exc_type} Value : {exc_value}") logger.ERROR(f"Error {exc_tb}") return article + + +def expand_details(article: Article) -> Article: + # this is dispatcher function + if article.SourceBank is None: + # This is Pubmed + updated_article = _expand_details_pubmed(article) + elif article.SourceBank == SourceBankType.PUBMED: + updated_article = _expand_details_pubmed(article) + elif article.SourceBank == SourceBankType.ARXIV: + updated_article = _expand_details_arxiv(article) + else: + raise NotImplementedError + + return updated_article + + + + diff --git a/triplea/service/repository/state/get_citation.py b/triplea/service/repository/state/get_citation.py index 34ed61f..71afc5e 100644 --- a/triplea/service/repository/state/get_citation.py +++ b/triplea/service/repository/state/get_citation.py @@ -1,10 +1,10 @@ import sys from triplea.client.pubmed import get_cited_article_from_pubmed -from triplea.schemas.article import Article +from triplea.schemas.article import Article, SourceBankType from triplea.service.click_logger import logger -def get_citation(article: Article): +def _get_citation_pubmed(article: Article): """ It takes an article, checks if the article's CiteCrawlerDeep is greater than 0, tries to get the cited articles from PubMed, @@ -26,7 +26,7 @@ def get_citation(article: Article): try: lc = get_cited_article_from_pubmed(pmid) except Exception: - article.State = 3 + article.State = -3 exc_type, exc_value, exc_tb = sys.exc_info() logger.ERROR(f"Error {exc_type} Value : {exc_value}") logger.ERROR(f"Error {exc_tb}") @@ -60,3 +60,24 @@ def get_citation(article: Article): ) return article + + +def _get_citation_arxiv(article: Article): + article.State = 3 + # I still haven't found an operational idea to get + # citations of arxiv articles, maybe through google. + return article + +def get_citation(article: Article): + # this is dispatcher function + if article.SourceBank is None: + # This is Pubmed + updated_article = _get_citation_pubmed(article) + elif article.SourceBank == SourceBankType.PUBMED: + updated_article = _get_citation_pubmed(article) + elif article.SourceBank == SourceBankType.ARXIV: + updated_article = _get_citation_arxiv(article) + else: + raise NotImplementedError + + return updated_article \ No newline at end of file diff --git a/triplea/service/repository/state/initial.py b/triplea/service/repository/state/initial.py index 24884cd..55bd072 100644 --- a/triplea/service/repository/state/initial.py +++ b/triplea/service/repository/state/initial.py @@ -1,6 +1,9 @@ import time from typing import Optional -from triplea.client.pubmed import get_article_list_from_pubmed +# from triplea.client.pubmed import get_article_list_from_pubmed + +import triplea.client.pubmed as PubmedClient +from triplea.schemas.article import Article, SourceBankType from triplea.service.click_logger import logger from triplea.config.settings import SETTINGS import triplea.service.repository.persist as persist @@ -24,12 +27,22 @@ def _save_article_pmid_list_in_arepo(data: dict) -> None: n = 0 for pmid in data["esearchresult"]["idlist"]: n = n + 1 - i = persist.insert_new_pmid( - pmid, - querytranslation=qt, - reference_crawler_deep=SETTINGS.AAA_REFF_CRAWLER_DEEP, - cite_crawler_deep=SETTINGS.AAA_CITED_CRAWLER_DEEP, - ) + article = Article() + article.State = 0 + article.SourceBank = SourceBankType.PUBMED + article.PMID = pmid + article.QueryTranslation = qt + article.ReferenceCrawlerDeep = SETTINGS.AAA_REFF_CRAWLER_DEEP + article.CiteCrawlerDeep = SETTINGS.AAA_CITED_CRAWLER_DEEP + + i = persist.insert_new_pubmed(article) + ## Old Approch + # i = persist.insert_new_pmid( + # pmid, + # querytranslation=qt, + # reference_crawler_deep=SETTINGS.AAA_REFF_CRAWLER_DEEP, + # cite_crawler_deep=SETTINGS.AAA_CITED_CRAWLER_DEEP, + # ) if i is None: # PMID is Duplicate logger.INFO(f"{pmid} is exist in knowledge repository. ({n})") else: @@ -63,7 +76,7 @@ def get_article_list_from_pubmed_all_store_to_arepo( :type retmax: Optional[int] (optional) """ sleep_time = 1 // tps_limit - data = get_article_list_from_pubmed(0, 2, searchterm) + data = PubmedClient.get_article_list_from_pubmed(0, 2, searchterm) total = int(data["esearchresult"]["count"]) logger.INFO("Total number of article is " + str(total)) @@ -96,7 +109,7 @@ def get_article_list_from_pubmed_all_store_to_arepo( deep=13, ) start = (i * retmax) - retmax - chunkdata = get_article_list_from_pubmed(start, retmax, searchterm) + chunkdata = PubmedClient.get_article_list_from_pubmed(start, retmax, searchterm) _save_article_pmid_list_in_arepo(chunkdata) # for last round @@ -105,7 +118,7 @@ def get_article_list_from_pubmed_all_store_to_arepo( logger.INFO(f"""Round ({str(i + 1)}): Get another {str(mid)} record (total {str(total)} record)""", deep=13) - chunkdata = get_article_list_from_pubmed(start, retmax, searchterm) + chunkdata = PubmedClient.get_article_list_from_pubmed(start, retmax, searchterm) _save_article_pmid_list_in_arepo(chunkdata) @@ -120,5 +133,5 @@ def get_article_list_from_pubmed_all_store_to_arepo( # searchterm = '"breast neoplasms"[MeSH Terms] OR ("breast"[All Fields] AND "neoplasms"[All Fields]) OR "breast neoplasms"[All Fields] OR ("breast"[All Fields] AND "cancer"[All Fields]) OR "breast cancer"[All Fields]' # noqa: E501 # searchterm = '((Bibliometric analysis[MeSH Terms])) OR ("Bibliometric analysis"[Title/Abstract])' # noqa: E501 searchterm = '"Rajaie Cardiovascular"[Affiliation]' - chunkdata = get_article_list_from_pubmed(start, retmax, searchterm) + chunkdata = PubmedClient.get_article_list_from_pubmed(start, retmax, searchterm) _save_article_pmid_list_in_arepo(chunkdata) diff --git a/triplea/service/repository/state/initial_arxiv.py b/triplea/service/repository/state/initial_arxiv.py new file mode 100644 index 0000000..d870218 --- /dev/null +++ b/triplea/service/repository/state/initial_arxiv.py @@ -0,0 +1,88 @@ +import json +import sys +from triplea.client.arxiv import get_article_list_from_arxiv +from triplea.schemas.article import Article, Author, SourceBankType +from triplea.service.click_logger import logger +from triplea.config.settings import SETTINGS +import triplea.service.repository.persist as persist + +import time +from typing import Optional + +def parse_arxiv_list(data:dict): + article_list = [] + try: + # Parse arxiv list into Article object list with State 1 + for a in data["feed"]["entry"]: + article = Article() + article.OreginalArticle= a + article.SourceBank = SourceBankType.ARXIV + article.QueryTranslation = data["feed"]["title"]["#text"] + article.State = 1 # Because of ArXiv API state 0,1 merge + article.ReferenceCrawlerDeep = SETTINGS.AAA_REFF_CRAWLER_DEEP + article.CiteCrawlerDeep = SETTINGS.AAA_CITED_CRAWLER_DEEP + article.ArxivID = str(a["id"]).split("arxiv.org/abs/")[-1] + + article_list.append(article) + except Exception: + exc_type, exc_value, exc_tb = sys.exc_info() + with open("error-parse_arxiv_list-{exc_tb.tb_lineno}.json", + "w") as outfile: + outfile.write(json.dumps(data, indent=4, sort_keys=True)) + outfile.close() + print() + + logger.ERROR(f"Error Line {exc_tb.tb_lineno}") + logger.ERROR(f"Error {exc_value}") + + return article_list + + +def get_article_list_from_arxiv_all_store_to_arepo( + searchterm: str, + start: Optional[bool] = 1, + max_results: Optional[int] = 100, + tps_limit: Optional[int] = 1, # tps_limit = 0 no limit +) -> None: + + if tps_limit == 0: + sleep_time = 0 + else: + sleep_time = 1 // tps_limit + data = get_article_list_from_arxiv( searchterm,start , max_results) + + total = int(data["feed"]["opensearch:totalResults"]["#text"]) + logger.INFO("Total number of article is " + str(total)) + + if total == 0: + return + + article_list = parse_arxiv_list(data) + + for a in article_list: + persist.insert_new_arxiv(a) + persist.refresh() + + n = 0 + while start < total: + n= n +1 + start = start + max_results + data = get_article_list_from_arxiv(searchterm, start, max_results) + article_list = parse_arxiv_list(data) + for a in article_list: + persist.insert_new_arxiv(a) # Check Dose Not Exist + time.sleep(sleep_time) + logger.INFO( + "Round (" + + str(n) + + ") : " + + "Get another " + + str(max_results) + + " record (Total " + + str(n * max_results) + + " record)", + deep=13, + ) + + + diff --git a/triplea/service/repository/state/parsing_details.py b/triplea/service/repository/state/parsing_details.py index 715c9c8..7f58423 100644 --- a/triplea/service/repository/state/parsing_details.py +++ b/triplea/service/repository/state/parsing_details.py @@ -1,359 +1,19 @@ -from triplea.schemas.article import Affiliation, Article, Author, Keyword -from triplea.service.click_logger import logger -import triplea.service.repository.persist as persist +from triplea.schemas.article import Article, SourceBankType +from triplea.service.repository.state import (parsing_details_arxiv, + parsing_details_pubmed) -def _convert_dict_to_class_affiliation(data: dict) -> Affiliation: - """ - It takes a dictionary as input, and returns an Affiliation object - :param data: dict - :type data: dict - :return: an Affiliation object - """ - affiliation = Affiliation() - affiliation.Text = data["Affiliation"] - aff_part = affiliation.Text.split(",") - aff_part_number = len(aff_part) - affiliation.Part1 = aff_part[0] - affiliation.Has_Extra = False - if aff_part_number > 1: - affiliation.Part2 = aff_part[1].strip() - if aff_part_number > 2: - affiliation.Part3 = aff_part[2].strip() - if aff_part_number > 3: - affiliation.Part4 = aff_part[3].strip() - if aff_part_number > 4: - affiliation.Part5 = aff_part[4].strip() - if aff_part_number > 5: - affiliation.Part6 = aff_part[5].strip() - if aff_part_number > 6: - affiliation.Has_Extra = True - - pre_hash = ( - str(affiliation.Part1) - + str(affiliation.Part2) - + str(affiliation.Part3) - + str(affiliation.Part4) - ) - affiliation.HashID = str(hash(pre_hash)) - return affiliation - - -def _convert_dict_to_class_author(data: dict) -> Author: - """ - It takes a dictionary and returns an Author object - - :param data: dict - :type data: dict - :return: an Author object - """ - if "CollectiveName" in data: - my_author = Author() - if "#text" in data["CollectiveName"]: - my_author.FullName = data["CollectiveName"]["#text"] - else: - my_author.FullName = data["CollectiveName"] - my_author.HashID = str(hash(my_author.FullName)) - return my_author - - my_author = Author() - if "ForeName" in data: - my_author.ForeName = data["ForeName"] - my_author.LastName = data["LastName"] - my_author.FullName = str(my_author.ForeName) + " " + my_author.LastName - my_author.HashID = str(hash(my_author.FullName)) - if "Identifier" in data: - if data["Identifier"]["@Source"] == "ORCID": - my_author.ORCID = data["Identifier"]["#text"] - - if "AffiliationInfo" in data: - affiliation_list = [] - if type(data["AffiliationInfo"]) == dict: - affiliation = _convert_dict_to_class_affiliation(data["AffiliationInfo"]) - affiliation_list.append(affiliation) - elif type(data["AffiliationInfo"]) == list: - for aff in data["AffiliationInfo"]: - affiliation = _convert_dict_to_class_affiliation(aff) - affiliation_list.append(affiliation) - else: - raise NotImplementedError - - my_author.Affiliations = affiliation_list - - return my_author - - -def _convert_dict_to_class_keyword(data: dict) -> Keyword: - """ - It takes a dictionary and returns a Keyword object - - :param data: the dictionary that contains the keyword information - :type data: dict - :return: A Keyword object - """ - my_keyword = Keyword() - if "#text" in data: - my_keyword.Text = data["#text"] - else: - if "i" in data: - my_keyword.Text = data["i"] # in PMID 37283018 - else: # in 34358588 - print() - print("Warning in _convert_dict_to_class_keyword line 103.") - my_keyword.Text = "" - - if "," in my_keyword.Text: - pass - # logger.ERROR ('The keyword text has the character ",".') - # raise NotImplementedError - if data["@MajorTopicYN"] == "Y": - my_keyword.IS_Major = True - else: - my_keyword.IS_Major = False - my_keyword.IS_Mesh = False - return my_keyword - - -def _convert_dict_to_reffrence(): - pass - - -def parsing_details(article: Article) -> Article: # noqa: C901 - article.State = 2 - backward_state = -1 - data = article.OreginalArticle - - if data is None: - print() - logger.ERROR( - f"""Error in Original Article data. It is Null. - PMID = {article.PMID}""" - ) - article.State = backward_state - return article - - # Read Original Article Format - if "PubmedArticleSet" in data: - if data["PubmedArticleSet"] is None: - print() - logger.ERROR( - f"Error in Original Article data. It is Null. PMID = {article.PMID}" - ) - article.State = backward_state - return article - - if "PubmedArticle" in data["PubmedArticleSet"]: - PubmedData = data["PubmedArticleSet"]["PubmedArticle"]["PubmedData"] - else: - print() - article.State = backward_state - logger.ERROR( - f"Error in format Original Article data. PMID = {article.PMID}" - ) - return article - else: - print() - logger.ERROR("Error in format Original Article data.") - article.State = backward_state - # data= json.dumps(data, indent=4) - # with open("one-error-originalarticle.json", "w") as outfile: - # outfile.write(data) - return article - - # The below code is checking if the article has a DOI or PMC number. - # If it does, it will update the article with the DOI or PMC number. - if "ArticleIdList" in PubmedData: - ArticleId = PubmedData["ArticleIdList"]["ArticleId"] - if type(ArticleId) == list: - for a_id in ArticleId: - if a_id["@IdType"] == "doi": - article.DOI = a_id["#text"] - elif a_id["@IdType"] == "pmc": - article.PMC = a_id["#text"] - else: - pass - # print() - # print(f'article() id type unhandel: {a_id["@IdType"]}') - elif type(ArticleId) == dict: - if ArticleId["@IdType"] == "doi": - article.DOI = a_id["#text"] - elif ArticleId["@IdType"] == "pmc": - article.PMC = a_id["#text"] - else: - pass - # print() - # print(f'article id type unhandel: {a_id["@IdType"]}') - - else: - raise NotImplementedError - - # Update Article Title & Journal Title. - pubmed_article_data = data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"][ - "Article" - ] - article.Title = pubmed_article_data["ArticleTitle"] - if type(article.Title) == dict: - article.Title = pubmed_article_data["ArticleTitle"]["#text"] - article.Journal = pubmed_article_data["Journal"]["Title"] - - # The below code is checking if the abstract is a string or a list. - # If it is a string, it will add the - # abstract to the database. If it is a list, - # it will add all the abstracts to the database. - if "Abstract" in pubmed_article_data: - if type(pubmed_article_data["Abstract"]) == dict: - if type(pubmed_article_data["Abstract"]["AbstractText"]) == str: - article.Abstract = pubmed_article_data["Abstract"]["AbstractText"] - elif type(pubmed_article_data["Abstract"]["AbstractText"]) == list: - abstract_all = "" - for abstract_part in pubmed_article_data["Abstract"]["AbstractText"]: - abstract_all = abstract_all + " " + abstract_part["#text"] - article.Abstract = abstract_all - elif type(pubmed_article_data["Abstract"]["AbstractText"]) == dict: - # exception happen in pmid '36497366' one-abstract-dict-mode.json - article.Abstract = pubmed_article_data["Abstract"]["AbstractText"][ - "#text" - ] - else: - t = type(pubmed_article_data["Abstract"]["AbstractText"]) - logger.ERROR(f"Type {str(t)} in Abstract Not Implemented") - raise NotImplementedError - else: - raise NotImplementedError - - # Creating a list of keywords. Merging Mesh List & Keyword List - medline_citation = data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"] - keyword_list = [] - if "MeshHeadingList" in medline_citation: - if type(medline_citation["MeshHeadingList"]["MeshHeading"]) == list: - for mesh in medline_citation["MeshHeadingList"]["MeshHeading"]: - my_keyword = Keyword() - my_keyword.Text = mesh["DescriptorName"]["#text"] - if mesh["DescriptorName"]["@MajorTopicYN"] == "Y": - my_keyword.IS_Major = True - else: - my_keyword.IS_Major = False - # mesh['QualifierName'] # We did not get into this subject - my_keyword.IS_Mesh = True - keyword_list.append(my_keyword) - elif type(medline_citation["MeshHeadingList"]["MeshHeading"]) == dict: - my_keyword = Keyword() - mesh = medline_citation["MeshHeadingList"]["MeshHeading"] - my_keyword.Text = mesh["DescriptorName"]["#text"] - if mesh["DescriptorName"]["@MajorTopicYN"] == "Y": - my_keyword.IS_Major = True - else: - my_keyword.IS_Major = False - # mesh['QualifierName'] # We did not get into this subject - my_keyword.IS_Mesh = True - keyword_list.append(my_keyword) - else: - raise NotImplementedError - - if "KeywordList" in medline_citation: - if type(medline_citation["KeywordList"]["Keyword"]) == list: - for keyword in medline_citation["KeywordList"]["Keyword"]: - my_keyword = _convert_dict_to_class_keyword(keyword) - keyword_list.append(my_keyword) - elif type(medline_citation["KeywordList"]["Keyword"]) == dict: - my_keyword = _convert_dict_to_class_keyword( - medline_citation["KeywordList"]["Keyword"] - ) - keyword_list.append(my_keyword) - else: - raise NotImplementedError - - article.Keywords = keyword_list - - # The code is parsing the Article and - # extracting the references from the Mode. - if "ReferenceList" in PubmedData: - if article.ReferenceCrawlerDeep is None: - # raise Exception('ReferenceCrawlerDeep is None.') - article.ReferenceCrawlerDeep = 0 - - reference_list = [] - - if isinstance(PubmedData["ReferenceList"], list): - print(PubmedData["ReferenceList"]) - for ref in PubmedData["ReferenceList"]: - if "ArticleIdList" in ref: - if type(ref["ArticleIdList"]["ArticleId"]) == dict: - if ref["ArticleIdList"]["ArticleId"]["@IdType"] == "pubmed": - reference_list.append( - ref["ArticleIdList"]["ArticleId"]["#text"] - ) - - elif type(ref["ArticleIdList"]["ArticleId"]) == list: - for ref_id in ref["ArticleIdList"]["ArticleId"]: - if ref_id["@IdType"] == "pubmed": - reference_list.append(ref_id["#text"]) - else: - raise NotImplementedError - - else: - if type(PubmedData["ReferenceList"]["Reference"]) == dict: - ref = PubmedData["ReferenceList"]["Reference"] - if "ArticleIdList" in ref: - if type(ref["ArticleIdList"]["ArticleId"]) == dict: - if ref["ArticleIdList"]["ArticleId"]["@IdType"] == "pubmed": - reference_list.append( - ref["ArticleIdList"]["ArticleId"]["#text"] - ) - - elif type(ref["ArticleIdList"]["ArticleId"]) == list: - for ref_id in ref["ArticleIdList"]["ArticleId"]: - if ref_id["@IdType"] == "pubmed": - reference_list.append(ref_id["#text"]) - else: - raise NotImplementedError - else: - for ref in PubmedData["ReferenceList"]["Reference"]: - if "ArticleIdList" in ref: - if type(ref["ArticleIdList"]["ArticleId"]) == dict: - if ref["ArticleIdList"]["ArticleId"]["@IdType"] == "pubmed": - reference_list.append( - ref["ArticleIdList"]["ArticleId"]["#text"] - ) - - elif type(ref["ArticleIdList"]["ArticleId"]) == list: - for ref_id in ref["ArticleIdList"]["ArticleId"]: - if ref_id["@IdType"] == "pubmed": - reference_list.append(ref_id["#text"]) - else: - raise NotImplementedError - - article.References = reference_list - - if article.ReferenceCrawlerDeep > 0: - # Create new article from References List - logger.DEBUG( - f"Add {len(reference_list)} new article(s) by REFERENCE. ", - forecolore="yellow", - deep=3, - ) - new_rcd = article.ReferenceCrawlerDeep - 1 - for ref_pmid in reference_list: - persist.insert_new_pmid(pmid=ref_pmid, reference_crawler_deep=new_rcd) - - if "AuthorList" in pubmed_article_data: - author_list = [] - if type(pubmed_article_data["AuthorList"]["Author"]) == list: - for author in pubmed_article_data["AuthorList"]["Author"]: - my_author = _convert_dict_to_class_author(author) - author_list.append(my_author) - elif type(pubmed_article_data["AuthorList"]["Author"]) == dict: - my_author = _convert_dict_to_class_author( - pubmed_article_data["AuthorList"]["Author"] - ) - author_list.append(my_author) - else: - raise NotImplementedError - article.Authors = author_list +def parsing_details(article: Article) -> Article: + # this is dispatcher function + if article.SourceBank is None: + # This is Pubmed + updated_article = parsing_details_pubmed(article) + elif article.SourceBank == SourceBankType.PUBMED: + updated_article = parsing_details_pubmed(article) + elif article.SourceBank == SourceBankType.ARXIV: + updated_article = parsing_details_arxiv(article) else: - logger.WARNING( - f"Article {article.PMID} has no AuthorList", forecolore="white", deep=5 - ) - - return article + raise NotImplementedError + + return updated_article \ No newline at end of file diff --git a/triplea/service/repository/state/parsing_details_arxiv.py b/triplea/service/repository/state/parsing_details_arxiv.py new file mode 100644 index 0000000..b3d8db8 --- /dev/null +++ b/triplea/service/repository/state/parsing_details_arxiv.py @@ -0,0 +1,60 @@ + + + +import sys +from triplea.schemas.article import Article, Author +from triplea.service.click_logger import logger +import triplea.service.repository.persist as persist + + +def _parse_arxiv_author(single_author_dict:dict)-> Author: + a = Author() + a.FullName = single_author_dict["name"] + # TODO affilation + return a + +def parsing_details_arxiv(article: Article) -> Article: + # current state may be 1 + article.State = 2 + backward_state = -1 + data = article.OreginalArticle + + if data is None: + print() + logger.ERROR( + f"""Error in Original Article data. It is Null. + PMID = {article.PMID}""" + ) + article.State = backward_state + return article + + try: + article.Journal = "Arxiv" + article.Title = str(data["title"]).replace("\n", " ") + article.Abstract = str(data["summary"]).replace("\n", " ") + + if isinstance(data["author"],list): + article_author_list =[] + for auth in data["author"]: + article_author_list.append(_parse_arxiv_author(auth)) + else: + article_author_list =[] + article_author_list.append(_parse_arxiv_author(data["author"])) + + article.Authors = article_author_list + + article.Published = data["published"] + # This is helped + # http://lukasschwab.me/arxiv.py/arxiv.html#Result.get_short_id + + # TODO DOI + return article + except Exception: + article.State = backward_state + exc_type, exc_value, exc_tb = sys.exc_info() + print() + + logger.ERROR(f"Error Line {exc_tb.tb_lineno}") + logger.ERROR(f"Error {exc_value}") + return article + diff --git a/triplea/service/repository/state/parsing_details_pubmed.py b/triplea/service/repository/state/parsing_details_pubmed.py new file mode 100644 index 0000000..7e681cd --- /dev/null +++ b/triplea/service/repository/state/parsing_details_pubmed.py @@ -0,0 +1,360 @@ +from triplea.schemas.article import Affiliation, Article, Author, Keyword +from triplea.service.click_logger import logger +import triplea.service.repository.persist as persist + + +def _convert_dict_to_class_affiliation(data: dict) -> Affiliation: + """ + It takes a dictionary as input, and returns an Affiliation object + + :param data: dict + :type data: dict + :return: an Affiliation object + """ + affiliation = Affiliation() + affiliation.Text = data["Affiliation"] + aff_part = affiliation.Text.split(",") + aff_part_number = len(aff_part) + affiliation.Part1 = aff_part[0] + affiliation.Has_Extra = False + if aff_part_number > 1: + affiliation.Part2 = aff_part[1].strip() + if aff_part_number > 2: + affiliation.Part3 = aff_part[2].strip() + if aff_part_number > 3: + affiliation.Part4 = aff_part[3].strip() + if aff_part_number > 4: + affiliation.Part5 = aff_part[4].strip() + if aff_part_number > 5: + affiliation.Part6 = aff_part[5].strip() + if aff_part_number > 6: + affiliation.Has_Extra = True + + pre_hash = ( + str(affiliation.Part1) + + str(affiliation.Part2) + + str(affiliation.Part3) + + str(affiliation.Part4) + ) + affiliation.HashID = str(hash(pre_hash)) + return affiliation + + +def _convert_dict_to_class_author(data: dict) -> Author: + """ + It takes a dictionary and returns an Author object + + :param data: dict + :type data: dict + :return: an Author object + """ + if "CollectiveName" in data: + my_author = Author() + if "#text" in data["CollectiveName"]: + my_author.FullName = data["CollectiveName"]["#text"] + else: + my_author.FullName = data["CollectiveName"] + my_author.HashID = str(hash(my_author.FullName)) + return my_author + + my_author = Author() + if "ForeName" in data: + my_author.ForeName = data["ForeName"] + my_author.LastName = data["LastName"] + my_author.FullName = str(my_author.ForeName) + " " + my_author.LastName + my_author.HashID = str(hash(my_author.FullName)) + if "Identifier" in data: + if data["Identifier"]["@Source"] == "ORCID": + my_author.ORCID = data["Identifier"]["#text"] + + if "AffiliationInfo" in data: + affiliation_list = [] + if type(data["AffiliationInfo"]) == dict: + affiliation = _convert_dict_to_class_affiliation(data["AffiliationInfo"]) + affiliation_list.append(affiliation) + elif type(data["AffiliationInfo"]) == list: + for aff in data["AffiliationInfo"]: + affiliation = _convert_dict_to_class_affiliation(aff) + affiliation_list.append(affiliation) + else: + raise NotImplementedError + + my_author.Affiliations = affiliation_list + + return my_author + + +def _convert_dict_to_class_keyword(data: dict) -> Keyword: + """ + It takes a dictionary and returns a Keyword object + + :param data: the dictionary that contains the keyword information + :type data: dict + :return: A Keyword object + """ + my_keyword = Keyword() + if "#text" in data: + my_keyword.Text = data["#text"] + else: + if "i" in data: + my_keyword.Text = data["i"] # in PMID 37283018 + else: # in 34358588 + print() + print("Warning in _convert_dict_to_class_keyword line 103.") + my_keyword.Text = "" + + if "," in my_keyword.Text: + pass + # logger.ERROR ('The keyword text has the character ",".') + # raise NotImplementedError + if data["@MajorTopicYN"] == "Y": + my_keyword.IS_Major = True + else: + my_keyword.IS_Major = False + my_keyword.IS_Mesh = False + return my_keyword + + +def _convert_dict_to_reffrence(): + pass + + +def parsing_details_pubmed(article: Article) -> Article: # noqa: C901 + # current state may be 1 + article.State = 2 + backward_state = -1 + data = article.OreginalArticle + + if data is None: + print() + logger.ERROR( + f"""Error in Original Article data. It is Null. + PMID = {article.PMID}""" + ) + article.State = backward_state + return article + + # Read Original Article Format + if "PubmedArticleSet" in data: + if data["PubmedArticleSet"] is None: + print() + logger.ERROR( + f"Error in Original Article data. It is Null. PMID = {article.PMID}" + ) + article.State = backward_state + return article + + if "PubmedArticle" in data["PubmedArticleSet"]: + PubmedData = data["PubmedArticleSet"]["PubmedArticle"]["PubmedData"] + else: + print() + article.State = backward_state + logger.ERROR( + f"Error in format Original Article data. PMID = {article.PMID}" + ) + return article + else: + print() + logger.ERROR("Error in format Original Article data.") + article.State = backward_state + # data= json.dumps(data, indent=4) + # with open("one-error-originalarticle.json", "w") as outfile: + # outfile.write(data) + return article + + # The below code is checking if the article has a DOI or PMC number. + # If it does, it will update the article with the DOI or PMC number. + if "ArticleIdList" in PubmedData: + ArticleId = PubmedData["ArticleIdList"]["ArticleId"] + if type(ArticleId) == list: + for a_id in ArticleId: + if a_id["@IdType"] == "doi": + article.DOI = a_id["#text"] + elif a_id["@IdType"] == "pmc": + article.PMC = a_id["#text"] + else: + pass + # print() + # print(f'article() id type unhandel: {a_id["@IdType"]}') + elif type(ArticleId) == dict: + if ArticleId["@IdType"] == "doi": + article.DOI = a_id["#text"] + elif ArticleId["@IdType"] == "pmc": + article.PMC = a_id["#text"] + else: + pass + # print() + # print(f'article id type unhandel: {a_id["@IdType"]}') + + else: + raise NotImplementedError + + # Update Article Title & Journal Title. + pubmed_article_data = data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"][ + "Article" + ] + article.Title = pubmed_article_data["ArticleTitle"] + if type(article.Title) == dict: + article.Title = pubmed_article_data["ArticleTitle"]["#text"] + article.Journal = pubmed_article_data["Journal"]["Title"] + + # The below code is checking if the abstract is a string or a list. + # If it is a string, it will add the + # abstract to the database. If it is a list, + # it will add all the abstracts to the database. + if "Abstract" in pubmed_article_data: + if type(pubmed_article_data["Abstract"]) == dict: + if type(pubmed_article_data["Abstract"]["AbstractText"]) == str: + article.Abstract = pubmed_article_data["Abstract"]["AbstractText"] + elif type(pubmed_article_data["Abstract"]["AbstractText"]) == list: + abstract_all = "" + for abstract_part in pubmed_article_data["Abstract"]["AbstractText"]: + abstract_all = abstract_all + " " + abstract_part["#text"] + article.Abstract = abstract_all + elif type(pubmed_article_data["Abstract"]["AbstractText"]) == dict: + # exception happen in pmid '36497366' one-abstract-dict-mode.json + article.Abstract = pubmed_article_data["Abstract"]["AbstractText"][ + "#text" + ] + else: + t = type(pubmed_article_data["Abstract"]["AbstractText"]) + logger.ERROR(f"Type {str(t)} in Abstract Not Implemented") + raise NotImplementedError + else: + raise NotImplementedError + + # Creating a list of keywords. Merging Mesh List & Keyword List + medline_citation = data["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"] + keyword_list = [] + if "MeshHeadingList" in medline_citation: + if type(medline_citation["MeshHeadingList"]["MeshHeading"]) == list: + for mesh in medline_citation["MeshHeadingList"]["MeshHeading"]: + my_keyword = Keyword() + my_keyword.Text = mesh["DescriptorName"]["#text"] + if mesh["DescriptorName"]["@MajorTopicYN"] == "Y": + my_keyword.IS_Major = True + else: + my_keyword.IS_Major = False + # mesh['QualifierName'] # We did not get into this subject + my_keyword.IS_Mesh = True + keyword_list.append(my_keyword) + elif type(medline_citation["MeshHeadingList"]["MeshHeading"]) == dict: + my_keyword = Keyword() + mesh = medline_citation["MeshHeadingList"]["MeshHeading"] + my_keyword.Text = mesh["DescriptorName"]["#text"] + if mesh["DescriptorName"]["@MajorTopicYN"] == "Y": + my_keyword.IS_Major = True + else: + my_keyword.IS_Major = False + # mesh['QualifierName'] # We did not get into this subject + my_keyword.IS_Mesh = True + keyword_list.append(my_keyword) + else: + raise NotImplementedError + + if "KeywordList" in medline_citation: + if type(medline_citation["KeywordList"]["Keyword"]) == list: + for keyword in medline_citation["KeywordList"]["Keyword"]: + my_keyword = _convert_dict_to_class_keyword(keyword) + keyword_list.append(my_keyword) + elif type(medline_citation["KeywordList"]["Keyword"]) == dict: + my_keyword = _convert_dict_to_class_keyword( + medline_citation["KeywordList"]["Keyword"] + ) + keyword_list.append(my_keyword) + else: + raise NotImplementedError + + article.Keywords = keyword_list + + # The code is parsing the Article and + # extracting the references from the Mode. + if "ReferenceList" in PubmedData: + if article.ReferenceCrawlerDeep is None: + # raise Exception('ReferenceCrawlerDeep is None.') + article.ReferenceCrawlerDeep = 0 + + reference_list = [] + + if isinstance(PubmedData["ReferenceList"], list): + print(PubmedData["ReferenceList"]) + for ref in PubmedData["ReferenceList"]: + if "ArticleIdList" in ref: + if type(ref["ArticleIdList"]["ArticleId"]) == dict: + if ref["ArticleIdList"]["ArticleId"]["@IdType"] == "pubmed": + reference_list.append( + ref["ArticleIdList"]["ArticleId"]["#text"] + ) + + elif type(ref["ArticleIdList"]["ArticleId"]) == list: + for ref_id in ref["ArticleIdList"]["ArticleId"]: + if ref_id["@IdType"] == "pubmed": + reference_list.append(ref_id["#text"]) + else: + raise NotImplementedError + + else: + if type(PubmedData["ReferenceList"]["Reference"]) == dict: + ref = PubmedData["ReferenceList"]["Reference"] + if "ArticleIdList" in ref: + if type(ref["ArticleIdList"]["ArticleId"]) == dict: + if ref["ArticleIdList"]["ArticleId"]["@IdType"] == "pubmed": + reference_list.append( + ref["ArticleIdList"]["ArticleId"]["#text"] + ) + + elif type(ref["ArticleIdList"]["ArticleId"]) == list: + for ref_id in ref["ArticleIdList"]["ArticleId"]: + if ref_id["@IdType"] == "pubmed": + reference_list.append(ref_id["#text"]) + else: + raise NotImplementedError + else: + for ref in PubmedData["ReferenceList"]["Reference"]: + if "ArticleIdList" in ref: + if type(ref["ArticleIdList"]["ArticleId"]) == dict: + if ref["ArticleIdList"]["ArticleId"]["@IdType"] == "pubmed": + reference_list.append( + ref["ArticleIdList"]["ArticleId"]["#text"] + ) + + elif type(ref["ArticleIdList"]["ArticleId"]) == list: + for ref_id in ref["ArticleIdList"]["ArticleId"]: + if ref_id["@IdType"] == "pubmed": + reference_list.append(ref_id["#text"]) + else: + raise NotImplementedError + + article.References = reference_list + + if article.ReferenceCrawlerDeep > 0: + # Create new article from References List + logger.DEBUG( + f"Add {len(reference_list)} new article(s) by REFERENCE. ", + forecolore="yellow", + deep=3, + ) + new_rcd = article.ReferenceCrawlerDeep - 1 + for ref_pmid in reference_list: + persist.insert_new_pmid(pmid=ref_pmid, reference_crawler_deep=new_rcd) + + if "AuthorList" in pubmed_article_data: + author_list = [] + if type(pubmed_article_data["AuthorList"]["Author"]) == list: + for author in pubmed_article_data["AuthorList"]["Author"]: + my_author = _convert_dict_to_class_author(author) + author_list.append(my_author) + elif type(pubmed_article_data["AuthorList"]["Author"]) == dict: + my_author = _convert_dict_to_class_author( + pubmed_article_data["AuthorList"]["Author"] + ) + author_list.append(my_author) + else: + raise NotImplementedError + article.Authors = author_list + else: + logger.WARNING( + f"Article {article.PMID} has no AuthorList", forecolore="white", deep=5 + ) + + return article diff --git a/triplea/the_private_backyard3.py b/triplea/the_private_backyard3.py index e5827c9..080b044 100644 --- a/triplea/the_private_backyard3.py +++ b/triplea/the_private_backyard3.py @@ -1,6 +1,11 @@ # flake8: noqa # noqa: F401 +import json +from triplea.client.arxiv import get_article_list_from_arxiv +from triplea.schemas.article import Article, Author +from triplea.service.repository import persist +from triplea.service.repository.pipeline_core import move_state_forward from triplea.service.repository.state.initial import get_article_list_from_pubmed_all_store_to_arepo import array @@ -18,10 +23,46 @@ get_clustering_coefficient_per_node, ) import networkx as nx +from triplea.service.repository.state.initial_arxiv import get_article_list_from_arxiv_all_store_to_arepo from triplea.utils.general import safe_csv + + + + + if __name__ == "__main__": - term = '("Large Language Models"[Title/Abstract]) OR ("Large Language Model"[Title/Abstract]) OR (LLM[Title/Abstract]) OR (LLMs[Title/Abstract]) OR (ChatGPT[Title/Abstract])' - get_article_list_from_pubmed_all_store_to_arepo(term) + pass + # term = '("Large Language Models"[Title/Abstract]) OR ("Large Language Model"[Title/Abstract]) OR (LLM[Title/Abstract]) OR (LLMs[Title/Abstract]) OR (ChatGPT[Title/Abstract])' + # get_article_list_from_pubmed_all_store_to_arepo(term) + + # r = get_article_list_from_arxiv("all:model", 1 ,3) + # with open("temp-arxiv.json", "w") as outfile: + # outfile.write(json.dumps(r, indent=4, sort_keys=True)) + # outfile.close() + + # get_article_list_from_arxiv_all_store_to_arepo("all:electron", 1 ,3) + + # from triplea.config.settings import SETTINGS + # import json + # import triplea.utils as Utils + + # f = open('temp-arxiv.json') + # data = json.load(f) + # a = int (data["feed"]["opensearch:totalResults"]["#text"]) + # print(data["feed"]["opensearch:totalResults"]["#text"]) + # print(a+1) + # import urllib.parse + + # text= 'ti:"large language model" OR abs:"medical"' + # text= urllib.parse.quote(text) + # get_article_list_from_arxiv_all_store_to_arepo(text,20,10) + + data = persist.get_article_group_by_state() + print(data) + + # move_state_forward(-1) + + diff --git a/triplea/utils/__init__.py b/triplea/utils/__init__.py index 80888d7..526aa12 100644 --- a/triplea/utils/__init__.py +++ b/triplea/utils/__init__.py @@ -1,5 +1,7 @@ from triplea.utils.general import safe_csv + __all__ = [ "safe_csv", + ]