From 3e5f93612743d327c3ffe65a4515d086e59301cd Mon Sep 17 00:00:00 2001 From: EhsanBitaraf Date: Sat, 30 Dec 2023 17:27:26 +0330 Subject: [PATCH] add `get_article_by_id` replace `get_article_by_pmid` --- CHANGELOG.md | 10 + README.md | 14 +- export | 1 + export_authors | 1 + export_keywords | 1 + export_topics | 1 + jupyter_lab/database/Arxiv_test.json | 1 - jupyter_lab/pipeline.ipynb | 237 +++++++++++++++--- pyproject.toml | 2 +- triplea/cli/arepo.py | 2 +- triplea/client/triple_extraction.py | 5 +- triplea/db/mongodb.py | 80 ++++-- triplea/db/tinydb.py | 22 ++ .../repository/export/triplea_format.py | 12 +- triplea/service/repository/persist.py | 66 ++++- triplea/service/repository/pipeline_core.py | 15 +- triplea/service/repository/pipeline_flag.py | 183 ++++++-------- .../state/custom/extract_kg_abstract.py | 6 +- .../repository/state/expand_details.py | 5 +- .../service/repository/state/get_citation.py | 10 +- .../service/repository/state/initial_arxiv.py | 7 + .../repository/state/parsing_details.py | 5 +- .../repository/state/parsing_details_arxiv.py | 2 +- .../state/parsing_details_pubmed.py | 2 +- triplea/the_private_backyard2.py | 65 +++-- triplea/the_private_backyard3.py | 6 +- triplea/utils/general.py | 19 ++ 27 files changed, 563 insertions(+), 217 deletions(-) create mode 100644 export create mode 100644 export_authors create mode 100644 export_keywords create mode 100644 export_topics delete mode 100644 jupyter_lab/database/Arxiv_test.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 56406e4..2ccbf8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,21 @@ All notable changes to this project will be documented in this file. ## v0.0.5 2023-12-28 + +### Task +- add `get_article_id_list_by_cstate` replace `get_article_pmid_list_by_cstate` +- add `get_article_by_id` replace `get_article_by_pmid` +- add `get_all_article_id_list` replace `get_all_article_pmid_list` +- move_state_forward may be error in TinyDB +- check all TinyDB + ### Improvements +- Add print_error in utils.general for unified Error printing - Add Published, ArxivID, SourceBank field in Article ### Bug Fixes +- Fix session of extract_triple ## v0.0.4 2023-10-14 ### Improvements diff --git a/README.md b/README.md index 1990f56..de4346d 100644 --- a/README.md +++ b/README.md @@ -210,7 +210,7 @@ output: #### Get and Save list of article identifier base on search term -Get list of article identifier (PMID) base on search term and save into knowledge repository in first state (0): +Get list of article identifier like PMID base on search term and save into knowledge repository in first state (0): use this command: ```shell @@ -234,13 +234,11 @@ The preparation of the article for extracting the graph has different steps that |State|Short Description|Description| |-----|-----------------|-----------| |0 |article identifier saved|At this stage, the article object stored in the data bank has only one identifier, such as the PMID or DOI identifier| -|1 |article details article info saved (json Form)|Metadata related to the article is stored in the `OreginalArticle` field from the `SourceBank`, but it has not been parsed yet| -|2 |parse details info|| -|3 |Get Citation|| - -|-1 |Error|if error happend in move state 1 to 2| - +|1 |article details article info saved (json Form)|Metadata related to the article is stored in the `OriginalArticle` field from the `SourceBank`, but it has not been parsed yet| +|2 |parse details info|The contents of the `OriginalArticle` field are parsed and placed in the fields of the Article object.| +|3 |Get Citation || +|-1 |Error |if error happend in move state 1 to 2| +|-2 |Error |if error happend in move state 2 to 3| There are two ways to run a pipeline. In the first method, we give the number of the existing state and all the articles in this state move forward one state. In another method, we give the final state number and each article under that state starts to move until it reaches the final state number that we specified. diff --git a/export b/export new file mode 100644 index 0000000..047d635 --- /dev/null +++ b/export @@ -0,0 +1 @@ +key,title,pmid,year,publisher,url,abstract,state,doi,journal_issn,journal_iso_abbreviation,language,publication_type,citation diff --git a/export_authors b/export_authors new file mode 100644 index 0000000..8dfe994 --- /dev/null +++ b/export_authors @@ -0,0 +1 @@ +key,authors,affiliations,country,university,institute,center,hospital,department,location,email,zipcode diff --git a/export_keywords b/export_keywords new file mode 100644 index 0000000..be7a4b8 --- /dev/null +++ b/export_keywords @@ -0,0 +1 @@ +key,keywords diff --git a/export_topics b/export_topics new file mode 100644 index 0000000..ed07582 --- /dev/null +++ b/export_topics @@ -0,0 +1 @@ +key,topics,rank diff --git a/jupyter_lab/database/Arxiv_test.json b/jupyter_lab/database/Arxiv_test.json deleted file mode 100644 index c65a72c..0000000 --- a/jupyter_lab/database/Arxiv_test.json +++ /dev/null @@ -1 +0,0 @@ -{"_default": {"1": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "2": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "3": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "4": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "5": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "6": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "7": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "8": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "9": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "10": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "11": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "12": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "13": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "14": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "15": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "16": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "17": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "18": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "19": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "20": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "21": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "22": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "23": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "24": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "25": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "26": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "27": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "28": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "29": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "30": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "31": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "32": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "33": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "34": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "35": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "36": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "37": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "38": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "39": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "40": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "41": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "42": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "43": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "44": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "45": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "46": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "47": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "48": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "49": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "50": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "51": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "52": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "53": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "54": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "55": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "56": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "57": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "58": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "59": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "60": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "61": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "62": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "63": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "64": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "65": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "66": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "67": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "68": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "69": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "70": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "71": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "72": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "73": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "74": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "75": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "76": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "77": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "78": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "79": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "80": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "81": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "82": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "83": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "84": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "85": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "86": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "87": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "88": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "89": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "90": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "91": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "92": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "93": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "94": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "95": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "96": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "97": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "98": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "99": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "100": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "101": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "102": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "103": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "104": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "105": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "106": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "107": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "108": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "109": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "110": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "111": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "112": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "113": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "114": {"Abstract": null, "ArxivID": "2311.15180v1", "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"arxiv:comment": {"#text": "7 pages, 2 figures, Workshop on AI Safety and Robustness In Finance,\n ICAIF 2023", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "arxiv:primary_category": {"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR", "@xmlns:arxiv": "http://arxiv.org/schemas/atom"}, "author": {"name": "Boyang Yu"}, "category": [{"@scheme": "http://arxiv.org/schemas/atom", "@term": "q-fin.TR"}, {"@scheme": "http://arxiv.org/schemas/atom", "@term": "cs.CL"}], "id": "http://arxiv.org/abs/2311.15180v1", "link": [{"@href": "http://arxiv.org/abs/2311.15180v1", "@rel": "alternate", "@type": "text/html"}, {"@href": "http://arxiv.org/pdf/2311.15180v1", "@rel": "related", "@title": "pdf", "@type": "application/pdf"}], "published": "2023-11-26T03:54:03Z", "summary": "The impact of non-deterministic outputs from Large Language Models (LLMs) is\nnot well examined for financial text understanding tasks. Through a compelling\ncase study on investing in the US equity market via news sentiment analysis, we\nuncover substantial variability in sentence-level sentiment classification\nresults, underscoring the innate volatility of LLM outputs. These uncertainties\ncascade downstream, leading to more significant variations in portfolio\nconstruction and return. While tweaking the temperature parameter in the\nlanguage model decoder presents a potential remedy, it comes at the expense of\nstifled creativity. Similarly, while ensembling multiple outputs mitigates the\neffect of volatile outputs, it demands a notable computational investment. This\nwork furnishes practitioners with invaluable insights for adeptly navigating\nuncertainty in the integration of LLMs into financial decision-making,\nparticularly in scenarios dictated by non-deterministic information.", "title": "Benchmarking Large Language Model Volatility", "updated": "2023-11-26T03:54:03Z"}, "PMC": null, "PMID": null, "Published": null, "QueryTranslation": "ArXiv Query: search_query=ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark&id_list=&start=0&max_results=10", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 2, "State": -1, "Title": null, "Topics": null}, "115": {"Abstract": null, "ArxivID": null, "Authors": null, "CiteCrawlerDeep": 0, "CitedBy": null, "DOI": null, "FlagAffiliationMining": null, "FlagExtractKG": null, "FlagExtractTopic": null, "InsertType": null, "Journal": null, "Keywords": null, "NamedEntities": null, "OreginalArticle": {"PubmedArticleSet": {"PubmedArticle": {"MedlineCitation": {"@IndexingMethod": "Automated", "@Owner": "NLM", "@Status": "Publisher", "Article": {"@PubModel": "Print-Electronic", "Abstract": {"AbstractText": [{"#text": "To analyze the quality and readability of information regarding shoulder stabilization surgery available using an online AI software (ChatGPT), using standardized scoring systems, as well as to report on the given answers by the AI.", "@Label": "PURPOSE", "@NlmCategory": "OBJECTIVE"}, {"#text": "An open AI model (ChatGPT) was used to answer 23 commonly asked questions from patients on shoulder stabilization surgery. These answers were evaluated for medical accuracy, quality, and readability using The JAMA Benchmark criteria, DISCERN score, Flesch-Kincaid Reading Ease Score (FRES) & Grade Level (FKGL).", "@Label": "METHODS", "@NlmCategory": "METHODS"}, {"#text": "The JAMA Benchmark criteria score was 0, which is the lowest score, indicating no reliable resources cited. The DISCERN score was 60, which is considered a good score. The areas that open AI model did not achieve full marks were also related to the lack of available source material used to compile the answers, and finally some shortcomings with information not fully supported by the literature. The FRES was 26.2, and the FKGL was considered to be that of a college graduate.", "@Label": "RESULTS", "@NlmCategory": "RESULTS"}, {"#text": "There was generally high quality in the answers given on questions relating to shoulder stabilization surgery, but there was a high reading level required to comprehend the information presented. However, it is unclear where the answers came from with no source material cited. It is important to note that the ChatGPT software repeatedly references the need to discuss these questions with an orthopaedic surgeon and the importance of shared discussion making, as well as compliance with surgeon treatment recommendations.", "@Label": "CONCLUSION", "@NlmCategory": "CONCLUSIONS"}, {"#text": "As shoulder instability is an injury that predominantly affects younger individuals who may use the Internet for information, this study shows what information patients may be getting online.", "@Label": "CLINICAL RELEVANCE", "@NlmCategory": "CONCLUSIONS"}], "CopyrightInformation": "Copyright \u00a9 2023 Arthroscopy Association of North America. Published by Elsevier Inc. All rights reserved."}, "ArticleDate": {"@DateType": "Electronic", "Day": "09", "Month": "08", "Year": "2023"}, "ArticleTitle": "Evaluation High-Quality of Information from ChatGPT (Artificial Intelligence-Large Language Model) Artificial Intelligence on Shoulder Stabilization Surgery.", "AuthorList": {"@CompleteYN": "Y", "Author": [{"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A.. Electronic address: eoghan.hurley@duke.edu."}, "ForeName": "Eoghan T", "Initials": "ET", "LastName": "Hurley"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Bryan S", "Initials": "BS", "LastName": "Crook"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Samuel G", "Initials": "SG", "LastName": "Lorentz"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Richard M", "Initials": "RM", "LastName": "Danilkowicz"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Brian C", "Initials": "BC", "LastName": "Lau"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Dean C", "Initials": "DC", "LastName": "Taylor"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Jonathan F", "Initials": "JF", "LastName": "Dickens"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Oke", "Initials": "O", "LastName": "Anakwenze"}, {"@ValidYN": "Y", "AffiliationInfo": {"Affiliation": "Duke University, Durham, North Carolina, U.S.A."}, "ForeName": "Christopher S", "Initials": "CS", "LastName": "Klifto"}]}, "ELocationID": [{"#text": "S0749-8063(23)00642-4", "@EIdType": "pii", "@ValidYN": "Y"}, {"#text": "10.1016/j.arthro.2023.07.048", "@EIdType": "doi", "@ValidYN": "Y"}], "Journal": {"ISOAbbreviation": "Arthroscopy", "ISSN": {"#text": "1526-3231", "@IssnType": "Electronic"}, "JournalIssue": {"@CitedMedium": "Internet", "PubDate": {"Day": "09", "Month": "Aug", "Year": "2023"}}, "Title": "Arthroscopy : the journal of arthroscopic & related surgery : official publication of the Arthroscopy Association of North America and the International Arthroscopy Association"}, "Language": "eng", "PublicationTypeList": {"PublicationType": {"#text": "Journal Article", "@UI": "D016428"}}}, "CitationSubset": "IM", "DateRevised": {"Day": "23", "Month": "08", "Year": "2023"}, "MedlineJournalInfo": {"Country": "United States", "ISSNLinking": "0749-8063", "MedlineTA": "Arthroscopy", "NlmUniqueID": "8506498"}, "PMID": {"#text": "37567487", "@Version": "1"}}, "PubmedData": {"ArticleIdList": {"ArticleId": [{"#text": "37567487", "@IdType": "pubmed"}, {"#text": "10.1016/j.arthro.2023.07.048", "@IdType": "doi"}, {"#text": "S0749-8063(23)00642-4", "@IdType": "pii"}]}, "History": {"PubMedPubDate": [{"@PubStatus": "received", "Day": "29", "Month": "3", "Year": "2023"}, {"@PubStatus": "revised", "Day": "27", "Month": "6", "Year": "2023"}, {"@PubStatus": "accepted", "Day": "28", "Month": "7", "Year": "2023"}, {"@PubStatus": "pubmed", "Day": "12", "Hour": "10", "Minute": "42", "Month": "8", "Year": "2023"}, {"@PubStatus": "medline", "Day": "12", "Hour": "10", "Minute": "42", "Month": "8", "Year": "2023"}, {"@PubStatus": "entrez", "Day": "11", "Hour": "19", "Minute": "27", "Month": "8", "Year": "2023"}]}, "PublicationStatus": "aheadofprint"}}}}, "PMC": null, "PMID": "37567487", "Published": null, "QueryTranslation": "\"large language model\"[Title] AND \"Benchmark\"[Title/Abstract]", "ReferenceCrawlerDeep": 0, "References": null, "SourceBank": 1, "State": -1, "Title": null, "Topics": null}}} \ No newline at end of file diff --git a/jupyter_lab/pipeline.ipynb b/jupyter_lab/pipeline.ipynb index b7fbaeb..403c2d5 100644 --- a/jupyter_lab/pipeline.ipynb +++ b/jupyter_lab/pipeline.ipynb @@ -10,9 +10,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ti%3A%22large%20language%20model%22%20AND%20ti%3ABenchmark\n", + "\u001b[32mTotal number of article is 114\u001b[0m\n", + "\u001b[32m Round (1) : Get another 10 record (Total 10 record)\u001b[0m\n", + "\u001b[32m Round (2) : Get another 10 record (Total 20 record)\u001b[0m\n", + "\u001b[32m Round (3) : Get another 10 record (Total 30 record)\u001b[0m\n", + "\u001b[32m Round (4) : Get another 10 record (Total 40 record)\u001b[0m\n", + "\u001b[32m Round (5) : Get another 10 record (Total 50 record)\u001b[0m\n", + "\u001b[32m Round (6) : Get another 10 record (Total 60 record)\u001b[0m\n", + "\u001b[32m Round (7) : Get another 10 record (Total 70 record)\u001b[0m\n", + "\u001b[32m Round (8) : Get another 10 record (Total 80 record)\u001b[0m\n", + "\u001b[32m Round (9) : Get another 10 record (Total 90 record)\u001b[0m\n", + "\u001b[32m Round (10) : Get another 10 record (Total 100 record)\u001b[0m\n", + "\u001b[32m Round (11) : Get another 10 record (Total 110 record)\u001b[0m\n", + "\n", + "\u001b[31mError in parsing arxiv response. Entry missing.\u001b[0m\n", + "\n", + "\u001b[31mError Line 23\u001b[0m\n", + "\u001b[31mError 'entry'\u001b[0m\n", + "\u001b[32m Round (12) : Get another 10 record (Total 120 record)\u001b[0m\n" + ] + } + ], "source": [ "import urllib.parse\n", "from triplea.service.repository.state.initial_arxiv import get_article_list_from_arxiv_all_store_to_arepo\n", @@ -27,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -59,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -67,60 +93,68 @@ "output_type": "stream", "text": [ "\u001b[32mNumber of article in article repository is 115\u001b[0m\n", - "[{'State': -2, 'n': 0}, {'State': -1, 'n': 115}, {'State': 0, 'n': 0}, {'State': 1, 'n': 0}, {'State': 2, 'n': 0}, {'State': 3, 'n': 0}, {'State': 4, 'n': 0}]\n", - "\u001b[32m115 article(s) in state -1.\u001b[0m\n" + "\u001b[32m115 article(s) in state 3.\u001b[0m\n" ] } ], "source": [ - "from triplea.service.click_logger import logger\n", - "from triplea.service.repository import persist\n", "\n", + "import triplea.service.repository.persist as PERSIST\n", + "import triplea.service.repository.pipeline_core as PIPELINE\n", "\n", - "logger.INFO(\n", - " \"Number of article in article repository is \"\n", - " + str(persist.get_all_article_count())\n", - ")\n", "\n", - "data = persist.get_article_group_by_state()\n", - "for i in range(-3, 7):\n", - " for s in data:\n", - " if s[\"State\"] == i:\n", - " w = 1\n", - " n = s[\"n\"]\n", - " if n != 0:\n", - " logger.INFO(f\"{n} article(s) in state {i}.\")" + "PERSIST.print_article_info_from_repo() " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Moving Forward\n", + "## Moving Forward in core pipeline\n", "We move from state `0` to state `3`\n", "The best approach is to finalize state all the article in the `core state`.\n", - "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define dependency:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from triplea.service.repository.pipeline_core import move_state_forward" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "### Moving from `0` to `1`" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m1 Article(s) is in state 0\u001b[0m\n", - "Article 37567487 with state 0 forward to 1\n" + "\u001b[32m0 Article(s) is in state 0\u001b[0m\n" ] } ], "source": [ - "from triplea.service.repository.pipeline_core import move_state_forward\n", "\n", - "move_state_forward(0)" + "PIPELINE.move_state_forward(0)" ] }, { @@ -130,13 +164,158 @@ "### Moving from `1` to `2`" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m0 Article(s) is in state 1\u001b[0m\n" + ] + } + ], + "source": [ + "PIPELINE.move_state_forward(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Moving from `2` to `3`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m0 Article(s) is in state 2\u001b[0m\n" + ] + } + ], + "source": [ + "PIPELINE.move_state_forward(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check article object info" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[0m\n", + "\u001b[32mTitle : Evaluation High-Quality of Information from ChatGPT (Artificial Intelligence-Large Language Model) Artificial Intelligence on Shoulder Stabilization Surgery.\u001b[0m\n", + "\u001b[32mJournal : Arthroscopy : the journal of arthroscopic & related surgery : official publication of the Arthroscopy Association of North America and the International Arthroscopy Association\u001b[0m\n", + "\u001b[32mDOI : 10.1016/j.arthro.2023.07.048\u001b[0m\n", + "\u001b[32mPMID : 37567487\u001b[0m\n", + "\u001b[32mPMC : None\u001b[0m\n", + "\u001b[32mState : 3\u001b[0m\n", + "\u001b[32mAuthors : Eoghan T Hurley, Bryan S Crook, Samuel G Lorentz, Richard M Danilkowicz, Brian C Lau, Dean C Taylor, Jonathan F Dickens, Oke Anakwenze, Christopher S Klifto, \u001b[0m\n", + "\u001b[32mKeywords: \u001b[0m\n" + ] + } + ], + "source": [ + "PERSIST.print_article_short_description(\"37567487\",\"pmid\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Moving forward in custom pipeline\n", + "These stages in custom pipleline do not have a specific prerequisite and post-requirement relationship, and when the core pipeline is provided and it has reached state 3, each of the actions of this pipeline can be done independently. This pipeline includes the following:\n", + "\n", + "|Action|Tag Name|Description|\n", + "|------|--------|-----------|\n", + "|Triple extraction from article abstract|FlagExtractKG||\n", + "|Topic extraction from article abstract|FlagExtractTopic||\n", + "|Convert Affiliation text to structural data|FlagAffiliationMining|This is simple way for parse Affiliation text |\n", + "|Convert Affiliation text to structural data|FlagAffiliationMining_Titipata|use [Titipat Achakulvisut Repo](https://github.com/titipata/affiliation_parser) for parsing Affiliation text|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extract Topic\n", + "In this method, we convert the article summary and the article title into a list of topics using topic extraction algorithms and save it. Previously, this method was in the program, but in the new versions, it is considered as an external service. The following variables are used to configure the service:\n", + "\n", + "- AAA_TOPIC_EXTRACT_ENDPOINT\n", + "- AAA_CLIENT_AGENT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import triplea.service.repository.pipeline_flag as cPIPELINE\n", + "cPIPELINE.go_extract_topic()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Affiliation Mining" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cPIPELINE.go_affiliation_mining(method=\"Titipata\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extract Triple\n", + "Extract Triple refers to the task of extracting subject-predicate-object triples from natural language text. Specifically:\n", + "\n", + "- A triple consists of a subject, a predicate (typically a verb), and an object. For example:\n", + "\n", + "[John] (subject) [eats] (predicate) [apples] (object)\n", + "\n", + "- Extracting triples involves analyzing sentences in text to identify these key elements and convert them into a structured format.\n", + "\n", + "- This allows capturing semantic relationships in text and representing them in a more machine-readable way for tasks like knowledge base construction, question answering, summarization, etc.\n", + "\n", + "- There are various methods for extract triple extraction ranging from rule-based systems to statistical and neural network models. These models identify the syntactic structure of sentences to detect appropriate noun phrases that can act as entities and predicates.\n", + "\n", + "So in summary, extract triple extraction aims to transform unstructured text into more structured triple representations automatically that provide deeper semantics and understand relationships described in the text. It serves as a key information extraction component for multiple downstream artificial intelligence applications." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "move_state_forward(1)" + "cPIPELINE.go_extract_triple()" ] } ], @@ -156,7 +335,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 23b8b8a..ee911eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "triplea" -version = "0.0.4" +version = "0.0.5" license = "Apache-2.0" description = "Article Analysis Assistant" authors = ["Ehsan Bitaraf ", "Maryam Jafarpour "] diff --git a/triplea/cli/arepo.py b/triplea/cli/arepo.py index dfd83e6..b0bbd88 100644 --- a/triplea/cli/arepo.py +++ b/triplea/cli/arepo.py @@ -112,7 +112,7 @@ def arepo(command, pmid, output): logger.ERROR("Not found.") sys.exit(1) return - + output_data = a a_title = a["Title"] a_journal = a["Journal"] diff --git a/triplea/client/triple_extraction.py b/triplea/client/triple_extraction.py index 31ba983..c49da65 100644 --- a/triplea/client/triple_extraction.py +++ b/triplea/client/triple_extraction.py @@ -3,9 +3,10 @@ import json from triplea.service.click_logger import logger +session = requests.Session() def extract_triple(text: str) -> list: - URL = SETTINGS.AAA_TOPIC_EXTRACT_ENDPOINT + URL = SETTINGS.AAA_TOPIC_EXTRACT_ENDPOINT + "/triple/" # data to be sent to api data = { @@ -28,7 +29,7 @@ def extract_triple(text: str) -> list: # sending get request and saving the response as response object try: - r = requests.post( + r = session.post( url=URL, data=json.dumps(data), headers=headers, proxies=proxy_servers ) except Exception: diff --git a/triplea/db/mongodb.py b/triplea/db/mongodb.py index b8a9dc6..6b25b6a 100644 --- a/triplea/db/mongodb.py +++ b/triplea/db/mongodb.py @@ -6,7 +6,7 @@ from triplea.config.settings import SETTINGS from triplea.schemas.article import Article from triplea.schemas.node import Edge, Node - +from bson import json_util class DB_MongoDB(DataBase): _connection_url = SETTINGS.AAA_MONGODB_CONNECTION_URL @@ -81,6 +81,24 @@ def get_article_pmid_list_by_cstate(self, state: int, tag_field: str): return [] else: return new_la + + def get_article_id_list_by_cstate(self, state: int, tag_field: str): + if state is None or state == 0: + myquery = {"$or": [{tag_field: None}, {tag_field: 0}]} + else: + myquery = {tag_field: state} + + cursor = self.col_article.find(myquery, projection={"PMID": "$PMID", "_id": 1}) + + la = list(cursor) + new_la = [] + for c in la: + new_la.append(c["_id"]) + + if len(new_la) == 0: + return [] + else: + return new_la def get_all_article_pmid_list(self): myquery = {} @@ -95,6 +113,20 @@ def get_all_article_pmid_list(self): return [] else: return new_la + + def get_all_article_id_list(self): + myquery = {} + cursor = self.col_article.find(myquery, projection={"PMID": "$PMID", "_id": 1}) + + la = list(cursor) + new_la = [] + for c in la: + new_la.append(c["_id"]) + + if len(new_la) == 0: + return [] + else: + return new_la def get_count_article_by_state(self, state: int): myquery = {"State": state} @@ -126,21 +158,30 @@ def get_article_by_id(self, id: str): la = list(cursor) return la[0] - def update_article_by_pmid(self, article: Article, pmid: str): - article_json = json.loads( - json.dumps(article, default=lambda o: o.__dict__, sort_keys=True, indent=4) - ) - myquery = {"PMID": pmid} - r = self.col_article.replace_one(myquery, article_json) - return r.raw_result - - def update_article_by_id(self, article: Article, id: str): - article_json = json.loads( - json.dumps(article, - default=lambda o: o.__dict__, - sort_keys=True, - indent=4) - ) +## Temporary + # def update_article_by_pmid(self, article: Article, pmid: str): + # article_json = json.loads( + # json.dumps(article, + # default=lambda o: o.__dict__, + # sort_keys=True, + # indent=4) + # ) + # myquery = {"PMID": pmid} + # r = self.col_article.replace_one(myquery, article_json) + # return r.raw_result + + def update_article_by_id(self, article: Article, id): + # article_json = json.loads( + # # json.dumps(article, + # # default=lambda o: o.__dict__, + # # sort_keys=True, + # # indent=4) + + # # json.dumps(article, default=json_util.default) + + # ) + # TODO Last way of serialization + article_json = article.dict() myquery = {"_id": id} r = self.col_article.replace_one(myquery, article_json) return r.raw_result @@ -243,9 +284,10 @@ def get_all_edges(self): # region Triple def add_new_triple(self, edge: dict) -> int: - triple_json = json.loads( - json.dumps(edge, default=lambda o: o.__dict__, sort_keys=True, indent=4) - ) + # triple_json = json.loads( + # json.dumps(edge, default=lambda o: o.__dict__, sort_keys=True, indent=4) + # ) + triple_json = edge result = self.col_triple.insert_one(triple_json) return result.inserted_id diff --git a/triplea/db/tinydb.py b/triplea/db/tinydb.py index f1308b0..9b412f8 100644 --- a/triplea/db/tinydb.py +++ b/triplea/db/tinydb.py @@ -65,6 +65,21 @@ def get_article_pmid_list_by_cstate(self, state: int, tag_field: str): l_pmid = [a.get("PMID") for a in self.db.search(q[tag_field] == state)] return l_pmid + + def get_article_id_list_by_cstate(self, state: int, tag_field: str): + q = Query() + if state is None or state == 0: + query = ( + (Query()[tag_field] == 0) + | (Query()[tag_field] is None) + | (~Query()[tag_field].exists()) + ) + l_pmid = [a.get("id") for a in self.db.search(query)] + else: + l_pmid = [a.get("id") for a in self.db.search(q[tag_field] == state)] + return l_pmid + + def get_all_article_pmid_list(self): l_all = self.db.all() l_pmid = [] @@ -72,6 +87,13 @@ def get_all_article_pmid_list(self): l_pmid.append(i["PMID"]) return l_pmid + def get_all_article_id_list(self): + l_all = self.db.all() + l_pmid = [] + for i in l_all: + l_pmid.append(i["id"]) + return l_pmid + def get_count_article_by_state(self, state: int): q = Query() l_pmid = self.db.search(q.State == state) diff --git a/triplea/service/repository/export/triplea_format.py b/triplea/service/repository/export/triplea_format.py index ff2af41..d4b5232 100644 --- a/triplea/service/repository/export/triplea_format.py +++ b/triplea/service/repository/export/triplea_format.py @@ -191,12 +191,12 @@ def export_triplea_csv(proccess_bar=False, limit_sample=0) -> str: # noqa: C901 def export_triplea_csvs_in_relational_mode_save_file( # noqa: C901 output_file: str, proccess_bar=True, limit_sample=0 ): # noqa: C901 - l_pmid = persist.get_all_article_pmid_list() - logger.DEBUG(f"{str(len(l_pmid))} Article(s) Selected.") + l_id = persist.get_all_article_id_list() + logger.DEBUG(f"{str(len(l_id))} Article(s) Selected.") - total_article_in_current_state = len(l_pmid) + total_article_in_current_state = len(l_id) - bar = click.progressbar(length=len(l_pmid), show_pos=True, show_percent=True) + bar = click.progressbar(length=len(l_id), show_pos=True, show_percent=True) max_refresh_point = 500 refresh_point = 0 csv = "" @@ -248,7 +248,7 @@ def export_triplea_csvs_in_relational_mode_save_file( # noqa: C901 f_keywords = open(keywords_file, "a", encoding="utf-8") f_topics = open(topics_file, "a", encoding="utf-8") - for id in l_pmid: + for id in l_id: try: n = n + 1 @@ -270,7 +270,7 @@ def export_triplea_csvs_in_relational_mode_save_file( # noqa: C901 if n > limit_sample: break - a = persist.get_article_by_pmid(id) + a = persist.get_article_by_id(id) # a = persist.get_article_by_pmid('18194356') # CRITICAL try: diff --git a/triplea/service/repository/persist.py b/triplea/service/repository/persist.py index 03cc276..d62e5e0 100644 --- a/triplea/service/repository/persist.py +++ b/triplea/service/repository/persist.py @@ -52,11 +52,17 @@ def get_article_pmid_list_by_cstate(state: int, tag_field: str): """ return db.get_article_pmid_list_by_cstate(state, tag_field) +def get_article_id_list_by_cstate(state: int, tag_field: str): + return db.get_article_id_list_by_cstate(state, tag_field) + def get_all_article_pmid_list(): return db.get_all_article_pmid_list() +def get_all_article_id_list(): + return db.get_all_article_id_list() + def get_count_article_by_state(state: int) -> int: """ This function returns the number of articles in the database @@ -82,7 +88,7 @@ def get_article_by_pmid(pmid: str): return db.get_article_by_pmid(pmid) -def get_article_by_id(id: str): +def get_article_by_id(id): return db.get_article_by_id(id) def update_article_by_pmid(article, pmid: str): @@ -181,13 +187,69 @@ def get_article_group_by_state(): return db.get_article_group_by_state() + + # region Extra Article Method def change_flag_extract_topic(current_value, set_value): return db.change_flag_extract_topic(current_value, set_value) - +def print_article_info_from_repo(): + logger.INFO( + "Number of article in article repository is " + + str(db.get_all_article_count()) + ) + + data = db.get_article_group_by_state() + for i in range(-3, 7): + for s in data: + if s["State"] == i: + w = 1 + n = s["n"] + if n != 0: + logger.INFO(f"{n} article(s) in state {i}.") + + + +def print_article_short_description(id:str, id_type:str): + id_type = id_type.lower() + if id_type == "pmid": + a = db.get_article_by_pmid(id) + elif id_type == "arxiv": + pass + else: + raise NotImplementedError + + if a is not None: + a_title = a["Title"] + a_journal = a["Journal"] + a_doi = a["DOI"] + a_pmid = a["PMID"] + a_pmc = a["PMC"] + a_state = a["State"] + + logger.INFO("") + logger.INFO(f"Title : {a_title}") + logger.INFO(f"Journal : {a_journal}") + logger.INFO(f"DOI : {a_doi}") + logger.INFO(f"PMID : {a_pmid}") + logger.INFO(f"PMC : {a_pmc}") + logger.INFO(f"State : {a_state}") + + if "Authors" in a: + if a["Authors"] is not None: + authors = "" + for author in a["Authors"]: + authors = authors + author["FullName"] + ", " + logger.INFO(f"Authors : {authors}") + + if "Keywords" in a: + if a["Keywords"] is not None: + keywords = "" + for k in a["Keywords"]: + keywords = keywords + k["Text"] + ", " + logger.INFO(f"Keywords: {keywords}") # endregion diff --git a/triplea/service/repository/pipeline_core.py b/triplea/service/repository/pipeline_core.py index df25764..68399db 100644 --- a/triplea/service/repository/pipeline_core.py +++ b/triplea/service/repository/pipeline_core.py @@ -181,6 +181,13 @@ def move_state_forward( updated_article = state_manager.get_citation(updated_article) # persist.update_article_by_pmid(updated_article, # updated_article.PMID) + elif current_state == -2: # Net state: Get Citation + updated_article = state_manager.get_citation(updated_article) + + else: + print() + logger.ERROR(f"Error undefine current state.") + persist.update_article_by_id(updated_article, id) @@ -189,8 +196,8 @@ def move_state_forward( if current_state == 1: updated_article = Article(**a.copy()) updated_article.State = -1 - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) persist.refresh() exc_type, exc_value, exc_tb = sys.exc_info() print() @@ -208,8 +215,8 @@ def move_state_forward( elif current_state == 2: updated_article = Article(**a.copy()) updated_article.State = -2 - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) persist.refresh() exc_type, exc_value, exc_tb = sys.exc_info() print() diff --git a/triplea/service/repository/pipeline_flag.py b/triplea/service/repository/pipeline_flag.py index 207228a..b12ace7 100644 --- a/triplea/service/repository/pipeline_flag.py +++ b/triplea/service/repository/pipeline_flag.py @@ -2,83 +2,84 @@ import time import traceback import click +from triplea.config.settings import SETTINGS from triplea.schemas.article import Article import triplea.service.repository.persist as persist import triplea.service.repository.state as state_manager from triplea.service.click_logger import logger +from triplea.utils.general import print_error + +def go_extract_triple(proccess_bar=True): + max_refresh_point = SETTINGS.AAA_CLI_ALERT_POINT + l_id = persist.get_article_id_list_by_cstate(0, "FlagExtractKG") + total_article_in_current_state = len(l_id) + n = 0 + logger.DEBUG(str(len(l_id)) + " Article(s) is in FlagExtractKG " + str(0)) -def go_extract_triple(): - online_bar = True - max_refresh_point = 500 - l_pmid = persist.get_article_pmid_list_by_cstate(0, "FlagExtractKG") - total_article_in_current_state = len(l_pmid) - number_of_article_move_forward = 0 - logger.DEBUG(str(len(l_pmid)) + " Article(s) is in FlagExtractKG " + str(0)) - - bar = click.progressbar(length=len(l_pmid), show_pos=True, show_percent=True) + bar = click.progressbar(length=len(l_id), show_pos=True, show_percent=True) refresh_point = 0 - for id in l_pmid: + for id in l_id: try: - number_of_article_move_forward = number_of_article_move_forward + 1 + n = n + 1 current_state = None if refresh_point == max_refresh_point: refresh_point = 0 persist.refresh() - if online_bar: + if proccess_bar: print() logger.INFO( - f"There are {str(total_article_in_current_state - number_of_article_move_forward)} article(s) left ", # noqa: E501 + f"There are {str(total_article_in_current_state - n)} article(s) left ", # noqa: E501 forecolore="yellow", ) - if online_bar is False: - bar.label = f"There are {str(total_article_in_current_state - number_of_article_move_forward)} article(s) left " # noqa: E501 + if proccess_bar is False: + bar.label = f"There are {str(total_article_in_current_state - n)} article(s) left " # noqa: E501 bar.update(max_refresh_point) else: refresh_point = refresh_point + 1 - a = persist.get_article_by_pmid(id) + a = persist.get_article_by_id(id) try: updated_article = Article(**a.copy()) except Exception: print() - print(logger.ERROR(f"Error in parsing article. PMID = {id}")) + print(logger.ERROR(f"Error in parsing article. ID = {id}")) raise Exception("Article Not Parsed.") try: current_state = updated_article.FlagExtractKG except Exception: current_state = 0 - if online_bar: + if proccess_bar: bar.label = ( "Article " - + updated_article.PMID + + str(id) + " Extract Knowledge Triple From Abstract" ) bar.update(1) if current_state is None: updated_article = state_manager.extract_triple_abstract_save( - updated_article + updated_article, id ) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) elif current_state == -1: updated_article = state_manager.extract_triple_abstract_save( - updated_article + updated_article, id ) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) elif current_state == 0: updated_article = state_manager.extract_triple_abstract_save( - updated_article + updated_article, id ) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) elif current_state == 1: pass @@ -90,40 +91,31 @@ def go_extract_triple(): if current_state == 0 or current_state is None: updated_article = Article(**a.copy()) updated_article.FlagExtractKG = 0 - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) persist.refresh() - exc_type, exc_value, exc_tb = sys.exc_info() - print() - logger.ERROR(f"Error {exc_type}") - logger.ERROR(f"Error {exc_value}") + print_error() else: persist.refresh() - exc_type, exc_value, exc_tb = sys.exc_info() - print() - print(exc_tb.tb_lineno) - print() - traceback.print_tb(exc_tb) - logger.ERROR(f"Error {exc_type}") - logger.ERROR(f"Error {exc_value}") - logger.ERROR(f"Error {exc_tb}") + print_error() + persist.refresh() def go_extract_topic(proccess_bar=True): - max_refresh_point = 500 - l_pmid = persist.get_article_pmid_list_by_cstate(0, "FlagExtractTopic") - total_article_in_current_state = len(l_pmid) + max_refresh_point = SETTINGS.AAA_CLI_ALERT_POINT + l_id = persist.get_article_id_list_by_cstate(0, "FlagExtractTopic") + total_article_in_current_state = len(l_id) n = 0 - logger.DEBUG(str(len(l_pmid)) + " Article(s) is in FlagExtractTopic " + str(0)) + logger.DEBUG(str(len(l_id)) + " Article(s) is in FlagExtractTopic " + str(0)) if proccess_bar: - bar = click.progressbar(length=len(l_pmid), show_pos=True, show_percent=True) + bar = click.progressbar(length=len(l_id), show_pos=True, show_percent=True) refresh_point = 0 - for id in l_pmid: + for id in l_id: try: n = n + 1 current_state = None @@ -143,12 +135,12 @@ def go_extract_topic(proccess_bar=True): else: refresh_point = refresh_point + 1 - a = persist.get_article_by_pmid(id) + a = persist.get_article_by_id(id) try: updated_article = Article(**a.copy()) except Exception: print() - print(logger.ERROR(f"Error in parsing article. PMID = {id}")) + print(logger.ERROR(f"Error in parsing article. ID = {id}")) raise Exception("Article Not Parsed.") try: current_state = updated_article.FlagExtractTopic # ------------ @@ -157,69 +149,59 @@ def go_extract_topic(proccess_bar=True): if proccess_bar: bar.label = ( - "Article " + updated_article.PMID + " , topic were extracted." + f"""Article {id}, topic were extracted.""" ) bar.update(1) if current_state is None: - updated_article = state_manager.extract_topic_abstract(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) - + updated_article = state_manager.extract_topic_abstract( + updated_article) elif current_state == -1: - updated_article = state_manager.extract_topic_abstract(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) - + updated_article = state_manager.extract_topic_abstract( + updated_article) elif current_state == 0: - updated_article = state_manager.extract_topic_abstract(updated_article) - persist.update_article_by_pmid(updated_article, - updated_article.PMID) - + updated_article = state_manager.extract_topic_abstract( + updated_article) elif current_state == 1: pass else: raise NotImplementedError + + persist.update_article_by_id(updated_article, id) except Exception: if current_state == 0 or current_state is None: updated_article = Article(**a.copy()) updated_article.FlagExtractTopic = -1 - persist.update_article_by_pmid(updated_article, - updated_article.PMID) + persist.update_article_by_id(updated_article, + id) persist.refresh() - exc_type, exc_value, exc_tb = sys.exc_info() - print() - logger.ERROR(f"Error {exc_type}") - logger.ERROR(f"Error {exc_value}") + print_error() else: persist.refresh() - exc_type, exc_value, exc_tb = sys.exc_info() - print() - print(exc_tb.tb_lineno) - logger.ERROR(f"Error {exc_type}") - logger.ERROR(f"Error {exc_value}") + print_error() persist.refresh() -def go_affiliation_mining(method: str = "Simple",proccess_bar=True): - online_bar = True - max_refresh_point = 500 - l_pmid = persist.get_article_pmid_list_by_cstate(0, "FlagAffiliationMining") - total_article_in_current_state = len(l_pmid) - number_of_article_move_forward = 0 - logger.DEBUG(f"""{str(len(l_pmid))} Article(s) is - in FlagAffiliationMining {str(0)}""") +def go_affiliation_mining(method: str = "Simple", proccess_bar=True): + max_refresh_point = SETTINGS.AAA_CLI_ALERT_POINT + l_id = persist.get_article_id_list_by_cstate(0, "FlagAffiliationMining") + total_article_in_current_state = len(l_id) + n = 0 + logger.DEBUG(f"""{str( + len(l_id) + )} Article(s) is in FlagAffiliationMining {str(0)}""") if proccess_bar: - bar = click.progressbar(length=len(l_pmid), show_pos=True, show_percent=True) + bar = click.progressbar(length=len(l_id), + show_pos=True, + show_percent=True) refresh_point = 0 - elapsed = 0 - for id in l_pmid: - start_time = time.time() + for id in l_id: + try: n = n + 1 current_state = None @@ -240,12 +222,12 @@ def go_affiliation_mining(method: str = "Simple",proccess_bar=True): else: refresh_point = refresh_point + 1 - a = persist.get_article_by_pmid(id) + a = persist.get_article_by_id(id) try: updated_article = Article(**a.copy()) except Exception: print() - print(logger.ERROR(f"Error in parsing article. PMID = {id}")) + print(logger.ERROR(f"Error in parsing article. ID = {id}")) raise Exception("Article Not Parsed.") try: current_state = updated_article.FlagAffiliationMining @@ -254,7 +236,7 @@ def go_affiliation_mining(method: str = "Simple",proccess_bar=True): if proccess_bar: bar.label = ( - f"Article {updated_article.PMID} affiliation mining." + f"Article {id} affiliation mining." ) bar.update(1) @@ -263,17 +245,14 @@ def go_affiliation_mining(method: str = "Simple",proccess_bar=True): if current_state is None or current_state == -1 or current_state == 0: if method == "Simple": - updated_article = state_manager.affiliation_mining(updated_article) - persist.update_article_by_pmid( - updated_article, updated_article.PMID - ) + updated_article = state_manager.affiliation_mining( + updated_article) + persist.update_article_by_id(updated_article, id) elif method == "Titipata": updated_article = state_manager.affiliation_mining_titipata( updated_article ) - persist.update_article_by_pmid( - updated_article, updated_article.PMID - ) + persist.update_article_by_id(updated_article, id) elif current_state == 1: pass @@ -288,17 +267,9 @@ def go_affiliation_mining(method: str = "Simple",proccess_bar=True): persist.update_article_by_pmid(updated_article, updated_article.PMID) persist.refresh() - exc_type, exc_value, exc_tb = sys.exc_info() - print() - logger.ERROR(f"Error {exc_type}") - logger.ERROR(f"Error {exc_value}") + print_error() else: persist.refresh() - exc_type, exc_value, exc_tb = sys.exc_info() - print() - print(exc_tb.tb_lineno) - logger.ERROR(f"Error {exc_type}") - logger.ERROR(f"Error {exc_value}") - elapsed = time.time() - start_time + print_error() persist.refresh() diff --git a/triplea/service/repository/state/custom/extract_kg_abstract.py b/triplea/service/repository/state/custom/extract_kg_abstract.py index ba3b54b..7d89a5a 100644 --- a/triplea/service/repository/state/custom/extract_kg_abstract.py +++ b/triplea/service/repository/state/custom/extract_kg_abstract.py @@ -4,14 +4,14 @@ import triplea.service.repository.persist as persist -def extract_triple_abstract_save(article: Article): +def extract_triple_abstract_save(article: Article,article_id): article.FlagExtractKG = 1 if article.Abstract is not None: # triples_list = extract_triples(article.Abstract) # Expire Module + triples_list = [] triples_list = extract_triple(article.Abstract) - triples_list = [] # CRITICAL must be API for t in triples_list: - t["PMID"] = article.PMID + t["Article_ID"] = article_id persist.create_triple(t) return article diff --git a/triplea/service/repository/state/expand_details.py b/triplea/service/repository/state/expand_details.py index d49852e..74dfaac 100644 --- a/triplea/service/repository/state/expand_details.py +++ b/triplea/service/repository/state/expand_details.py @@ -9,7 +9,10 @@ def _expand_details_arxiv(article: Article) -> Article: - article.State = 1 + # previous state is 0 + article.State = 1 # next state + backward_state = 0 + # Archive is One Shot. There is no need for this step, # although it should be checked why it is at this step return article diff --git a/triplea/service/repository/state/get_citation.py b/triplea/service/repository/state/get_citation.py index 71afc5e..d13ea6f 100644 --- a/triplea/service/repository/state/get_citation.py +++ b/triplea/service/repository/state/get_citation.py @@ -17,7 +17,9 @@ def _get_citation_pubmed(article: Article): :type article: Article :return: Article with list of CitedBy """ - article.State = 3 + # previous state is 2 + article.State = 3 # next state + backward_state = -2 pmid = article.PMID if pmid is not None: if article.CiteCrawlerDeep is None: @@ -26,7 +28,7 @@ def _get_citation_pubmed(article: Article): try: lc = get_cited_article_from_pubmed(pmid) except Exception: - article.State = -3 + article.State = backward_state exc_type, exc_value, exc_tb = sys.exc_info() logger.ERROR(f"Error {exc_type} Value : {exc_value}") logger.ERROR(f"Error {exc_tb}") @@ -63,7 +65,9 @@ def _get_citation_pubmed(article: Article): def _get_citation_arxiv(article: Article): - article.State = 3 + # previous state is 2 + article.State = 3 # next state + backward_state = -2 # I still haven't found an operational idea to get # citations of arxiv articles, maybe through google. return article diff --git a/triplea/service/repository/state/initial_arxiv.py b/triplea/service/repository/state/initial_arxiv.py index d870218..0a68dce 100644 --- a/triplea/service/repository/state/initial_arxiv.py +++ b/triplea/service/repository/state/initial_arxiv.py @@ -12,6 +12,13 @@ def parse_arxiv_list(data:dict): article_list = [] try: + if "feed" not in data: + print() + logger.ERROR(f"Error in parsing arxiv response. Feed missing.") + if "entry" not in data["feed"]: + print() + logger.ERROR(f"Error in parsing arxiv response. Entry missing.") + # Parse arxiv list into Article object list with State 1 for a in data["feed"]["entry"]: article = Article() diff --git a/triplea/service/repository/state/parsing_details.py b/triplea/service/repository/state/parsing_details.py index 7f58423..7ac9ee4 100644 --- a/triplea/service/repository/state/parsing_details.py +++ b/triplea/service/repository/state/parsing_details.py @@ -1,8 +1,7 @@ from triplea.schemas.article import Article, SourceBankType -from triplea.service.repository.state import (parsing_details_arxiv, - parsing_details_pubmed) - +from triplea.service.repository.state.parsing_details_arxiv import parsing_details_arxiv +from triplea.service.repository.state.parsing_details_pubmed import parsing_details_pubmed def parsing_details(article: Article) -> Article: # this is dispatcher function diff --git a/triplea/service/repository/state/parsing_details_arxiv.py b/triplea/service/repository/state/parsing_details_arxiv.py index b3d8db8..230115b 100644 --- a/triplea/service/repository/state/parsing_details_arxiv.py +++ b/triplea/service/repository/state/parsing_details_arxiv.py @@ -15,7 +15,7 @@ def _parse_arxiv_author(single_author_dict:dict)-> Author: def parsing_details_arxiv(article: Article) -> Article: # current state may be 1 - article.State = 2 + article.State = 2 # next state backward_state = -1 data = article.OreginalArticle diff --git a/triplea/service/repository/state/parsing_details_pubmed.py b/triplea/service/repository/state/parsing_details_pubmed.py index 7e681cd..89d844b 100644 --- a/triplea/service/repository/state/parsing_details_pubmed.py +++ b/triplea/service/repository/state/parsing_details_pubmed.py @@ -121,7 +121,7 @@ def _convert_dict_to_reffrence(): def parsing_details_pubmed(article: Article) -> Article: # noqa: C901 # current state may be 1 - article.State = 2 + article.State = 2 # next state backward_state = -1 data = article.OreginalArticle diff --git a/triplea/the_private_backyard2.py b/triplea/the_private_backyard2.py index 09fe1d8..f7baaed 100644 --- a/triplea/the_private_backyard2.py +++ b/triplea/the_private_backyard2.py @@ -1,6 +1,7 @@ # flake8: noqa # noqa: F401 +from bson import BSON, ObjectId import click import time import sys @@ -32,38 +33,54 @@ from triplea.service.repository.state.custom.affiliation_mining import ( get_affiliation_structured, ) - +from triplea.service.repository.state.initial_arxiv import get_article_list_from_arxiv_all_store_to_arepo +from triplea.service.repository.state.initial import get_article_list_from_pubmed_all_store_to_arepo if __name__ == "__main__": pass - export_triplea_csvs_in_relational_mode_save_file( - "export.csv", proccess_bar=True, limit_sample=0 - ) - # move_state_forward(2) - # go_affiliation_mining() + # Pipeline Sample - # persist.change_flag_extract_topic(1,0) - # go_extract_topic(proccess_bar=True) + # # Step 1 - Get article from Arxiv + # arxiv_search_string = 'ti:"large language model" AND ti:Benchmark' + # get_article_list_from_arxiv_all_store_to_arepo(arxiv_search_string,0,10) - # aff_text = "Institute for Molecular Medicine Finland (FIMM), Helsinki Institute of Life Science (HiLIFE), University of Helsinki, Helsinki, Finland. aarno.palotie@helsinki.fi" - # aff_text = "Department of Neurology and Institute of Neurology, Huashan Hospital, State Key Laboratory of Medical Neurobiology and MOE Frontiers Center for Brain Science, Shanghai Medical College, Fudan University, National Center for Neurological Disorders, Shanghai, China. jintai_yu@fudan.edu.cn" - # aff_text = "Department of Ophthalmology, University of Washington, Seattle, Washington, USA" - # print(get_affiliation_structured(aff_text)) + # # Step 2 - Get article from Pubmed + # pubmed_search_string = '("large language model"[Title]) AND (Benchmark[Title/Abstract])' + # get_article_list_from_pubmed_all_store_to_arepo(pubmed_search_string) + - # from triplea.service.repository.state.custom.affiliation_mining import _is_country - # print(_is_country("Finland. aarno.palotie@helsinki.fi")) - # print(_is_country("Finland")) - # print(parse_affiliation(aff_text)) + # Step 3 - Get info + persist.print_article_info_from_repo() - # go_affiliation_mining(method='Titipata') + # # Step 4 - Moving from `0` to `1` + # move_state_forward(0) - # import triplea.service.repository.state as state_manager - # a = persist.get_article_by_pmid('31679581') - # updated_article = Article(**a.copy()) - # state_manager.affiliation_mining_titipata(updated_article) + # # Step 5 - Moving from `1` to `2` + # move_state_forward(1) + + # # Step 6 - Moving from `2` to `3` + # move_state_forward(2) + + # Get article info + # print() + # persist.print_article_short_description("37567487","pmid") - # import triplea.service.repository.state as state_manager - # a = persist.get_article_by_pmid('34358588') + + # a = persist.get_article_by_id(ObjectId('658f85228f23534d63358a19')) # updated_article = Article(**a.copy()) - # state_manager.parsing_details(updated_article) + # print(updated_article.State) + # print(updated_article.Title) + # print(updated_article.Published) + # print(type(updated_article.Published)) + + import triplea.service.repository.pipeline_flag as cPIPELINE + + # cPIPELINE.go_extract_topic() + + # cPIPELINE.go_affiliation_mining(method="Titipata") + + # cPIPELINE.go_extract_triple() + + export_triplea_csvs_in_relational_mode_save_file("export") + \ No newline at end of file diff --git a/triplea/the_private_backyard3.py b/triplea/the_private_backyard3.py index 080b044..ec3caf9 100644 --- a/triplea/the_private_backyard3.py +++ b/triplea/the_private_backyard3.py @@ -60,9 +60,11 @@ # text= urllib.parse.quote(text) # get_article_list_from_arxiv_all_store_to_arepo(text,20,10) - data = persist.get_article_group_by_state() - print(data) + # data = persist.get_article_group_by_state() + # print(data) # move_state_forward(-1) + move_state_forward(0) + diff --git a/triplea/utils/general.py b/triplea/utils/general.py index 5c9e8bf..b37478c 100644 --- a/triplea/utils/general.py +++ b/triplea/utils/general.py @@ -1,3 +1,6 @@ +import sys +from triplea.service.click_logger import logger + def safe_csv(text: str) -> str: if text is None: return "" @@ -9,3 +12,19 @@ def safe_csv(text: str) -> str: text = f'"{text[:-1]}"' return text + + +def print_error(): + exc_type, exc_value, exc_tb = sys.exc_info() + print() + logger.ERROR(f"Error {exc_type}") + logger.ERROR(f"Error {exc_value}") + + # exc_type, exc_value, exc_tb = sys.exc_info() + # print() + # print(exc_tb.tb_lineno) + # print() + # traceback.print_tb(exc_tb) + # logger.ERROR(f"Error {exc_type}") + # logger.ERROR(f"Error {exc_value}") + # logger.ERROR(f"Error {exc_tb}") \ No newline at end of file