done first batch
This commit is contained in:
@@ -12,7 +12,12 @@
|
||||
\abx@aux@segm{0}{0}{ivanov2024}
|
||||
\abx@aux@cite{0}{ivanov2024}
|
||||
\abx@aux@segm{0}{0}{ivanov2024}
|
||||
\abx@aux@read@bbl@mdfivesum{A739BAAE76801A6EB23A4AE3E4219B4A}
|
||||
\abx@aux@cite{0}{hoffmann2022trainingcomputeoptimallargelanguage}
|
||||
\abx@aux@segm{0}{0}{hoffmann2022trainingcomputeoptimallargelanguage}
|
||||
\abx@aux@cite{0}{hoffmann2022trainingcomputeoptimallargelanguage}
|
||||
\abx@aux@segm{0}{0}{hoffmann2022trainingcomputeoptimallargelanguage}
|
||||
\abx@aux@read@bbl@mdfivesum{117EB34AA21A650891B593351C04032D}
|
||||
\abx@aux@defaultrefcontext{0}{hoffmann2022trainingcomputeoptimallargelanguage}{nty/global//global/global}
|
||||
\abx@aux@defaultrefcontext{0}{ivanov2024}{nty/global//global/global}
|
||||
\abx@aux@defaultrefcontext{0}{app14020744}{nty/global//global/global}
|
||||
\abx@aux@defaultrefcontext{0}{vaswani2023attentionneed}{nty/global//global/global}
|
||||
|
||||
164
EEMLA.bbl
164
EEMLA.bbl
@@ -19,6 +19,170 @@
|
||||
|
||||
\refsection{0}
|
||||
\datalist[entry]{nty/global//global/global}
|
||||
\entry{hoffmann2022trainingcomputeoptimallargelanguage}{misc}{}
|
||||
\name{author}{22}{}{%
|
||||
{{un=0,uniquepart=base,hash=92d226c4ea870c0e9df2ceb8bf03d8ff}{%
|
||||
family={Hoffmann},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Jordan},
|
||||
giveni={J\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=c28d74501d531d6672ccf4c28016c7ac}{%
|
||||
family={Borgeaud},
|
||||
familyi={B\bibinitperiod},
|
||||
given={Sebastian},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=e9a587c57b1dcb4834850800386d8aca}{%
|
||||
family={Mensch},
|
||||
familyi={M\bibinitperiod},
|
||||
given={Arthur},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=eee8c4b85d5c4e1f4bc70218d34ba69d}{%
|
||||
family={Buchatskaya},
|
||||
familyi={B\bibinitperiod},
|
||||
given={Elena},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=3d7a83ed6eb983ca17cec804631dc22e}{%
|
||||
family={Cai},
|
||||
familyi={C\bibinitperiod},
|
||||
given={Trevor},
|
||||
giveni={T\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=5cc07c10e9f1ef5fae367662553d982c}{%
|
||||
family={Rutherford},
|
||||
familyi={R\bibinitperiod},
|
||||
given={Eliza},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=47f81e65396e1d91b7ab70d59f69329b}{%
|
||||
family={Las\bibnamedelima Casas},
|
||||
familyi={L\bibinitperiod\bibinitdelim C\bibinitperiod},
|
||||
given={Diego},
|
||||
giveni={D\bibinitperiod},
|
||||
givenun=0,
|
||||
prefix={de},
|
||||
prefixi={d\bibinitperiod}}}%
|
||||
{{un=0,uniquepart=base,hash=68b917d11d355ed641ee448751fb3ba5}{%
|
||||
family={Hendricks},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Lisa\bibnamedelima Anne},
|
||||
giveni={L\bibinitperiod\bibinitdelim A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=7a7fdb4cb72d04e1eaa8a6a2ca0358dc}{%
|
||||
family={Welbl},
|
||||
familyi={W\bibinitperiod},
|
||||
given={Johannes},
|
||||
giveni={J\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=b06242c53973106520ab19f8e1fc3799}{%
|
||||
family={Clark},
|
||||
familyi={C\bibinitperiod},
|
||||
given={Aidan},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=0c1f2b1cf15f9e15216a9e652512bf6f}{%
|
||||
family={Hennigan},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Tom},
|
||||
giveni={T\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=70b3914180795f8c87317cbd0599c942}{%
|
||||
family={Noland},
|
||||
familyi={N\bibinitperiod},
|
||||
given={Eric},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=da626c813aa81825001015989a23f850}{%
|
||||
family={Millican},
|
||||
familyi={M\bibinitperiod},
|
||||
given={Katie},
|
||||
giveni={K\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=54c5c572d4fdc5d822c240b59fcadad4}{%
|
||||
family={Driessche},
|
||||
familyi={D\bibinitperiod},
|
||||
given={George},
|
||||
giveni={G\bibinitperiod},
|
||||
givenun=0,
|
||||
prefix={van\bibnamedelima den},
|
||||
prefixi={v\bibinitperiod\bibinitdelim d\bibinitperiod}}}%
|
||||
{{un=0,uniquepart=base,hash=4da5c824138e58f9def8fd792b5ad94f}{%
|
||||
family={Damoc},
|
||||
familyi={D\bibinitperiod},
|
||||
given={Bogdan},
|
||||
giveni={B\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=8bc2c63cf1c4c6e9e4d317839dda4de2}{%
|
||||
family={Guy},
|
||||
familyi={G\bibinitperiod},
|
||||
given={Aurelia},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=643f92e8f89f2746a4c1aa077d225755}{%
|
||||
family={Osindero},
|
||||
familyi={O\bibinitperiod},
|
||||
given={Simon},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=9d16b7284df92c9adaee86c37ab992df}{%
|
||||
family={Simonyan},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Karen},
|
||||
giveni={K\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=6aa92a937d4d30dd0b5ec0eecbad1bf1}{%
|
||||
family={Elsen},
|
||||
familyi={E\bibinitperiod},
|
||||
given={Erich},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=8dcdeb16ef3c68cf8396226668804fe0}{%
|
||||
family={Rae},
|
||||
familyi={R\bibinitperiod},
|
||||
given={Jack\bibnamedelima W.},
|
||||
giveni={J\bibinitperiod\bibinitdelim W\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=494b568c5dc85ba8f3f409635f9c5f25}{%
|
||||
family={Vinyals},
|
||||
familyi={V\bibinitperiod},
|
||||
given={Oriol},
|
||||
giveni={O\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=50d24de916599d306c5cb1a77156e4b9}{%
|
||||
family={Sifre},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Laurent},
|
||||
giveni={L\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{fullhash}{fb6d92421541c28738787855a47da527}
|
||||
\strng{bibnamehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{authorbibnamehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{authornamehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{authorfullhash}{fb6d92421541c28738787855a47da527}
|
||||
\field{sortinit}{H}
|
||||
\field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{eprintclass}{cs.CL}
|
||||
\field{eprinttype}{arXiv}
|
||||
\field{title}{Training Compute-Optimal Large Language Models}
|
||||
\field{year}{2022}
|
||||
\verb{eprint}
|
||||
\verb 2203.15556
|
||||
\endverb
|
||||
\verb{urlraw}
|
||||
\verb arxiv.org/abs/2203.15556
|
||||
\endverb
|
||||
\verb{url}
|
||||
\verb arxiv.org/abs/2203.15556
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{ivanov2024}{misc}{}
|
||||
\name{author}{2}{}{%
|
||||
{{un=0,uniquepart=base,hash=1935b6f0043d4bac823842ff5d478faf}{%
|
||||
|
||||
@@ -2770,6 +2770,8 @@
|
||||
<bcf:citekey order="4" intorder="1">Wang2024</bcf:citekey>
|
||||
<bcf:citekey order="5" intorder="1">ivanov2024</bcf:citekey>
|
||||
<bcf:citekey order="6" intorder="1">ivanov2024</bcf:citekey>
|
||||
<bcf:citekey order="7" intorder="1">hoffmann2022trainingcomputeoptimallargelanguage</bcf:citekey>
|
||||
<bcf:citekey order="8" intorder="1">hoffmann2022trainingcomputeoptimallargelanguage</bcf:citekey>
|
||||
</bcf:section>
|
||||
<!-- SORTING TEMPLATES -->
|
||||
<bcf:sortingtemplate name="nty">
|
||||
|
||||
30
EEMLA.blg
30
EEMLA.blg
@@ -1,17 +1,17 @@
|
||||
[0] Config.pm:307> INFO - This is Biber 2.19
|
||||
[0] Config.pm:310> INFO - Logfile is 'EEMLA.blg'
|
||||
[41] biber:340> INFO - === Fri Feb 28, 2025, 02:56:54
|
||||
[50] Biber.pm:419> INFO - Reading 'EEMLA.bcf'
|
||||
[86] Biber.pm:979> INFO - Found 4 citekeys in bib section 0
|
||||
[95] Biber.pm:4419> INFO - Processing section 0
|
||||
[100] Biber.pm:4610> INFO - Looking for bibtex file 'references.bib' for section 0
|
||||
[100] bibtex.pm:1713> INFO - LaTeX decoding ...
|
||||
[103] bibtex.pm:1519> INFO - Found BibTeX data source 'references.bib'
|
||||
[137] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[137] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[137] Biber.pm:4239> INFO - Sorting list 'nty/global//global/global' of type 'entry' with template 'nty' and locale 'en-US'
|
||||
[137] Biber.pm:4245> INFO - No sort tailoring available for locale 'en-US'
|
||||
[142] bbl.pm:660> INFO - Writing 'EEMLA.bbl' with encoding 'UTF-8'
|
||||
[143] bbl.pm:763> INFO - Output to EEMLA.bbl
|
||||
[143] Biber.pm:131> WARN - legacy month field 'Oct' in entry 'Wang2024' is not an integer - this will probably not sort properly.
|
||||
[143] Biber.pm:133> INFO - WARNINGS: 1
|
||||
[38] biber:340> INFO - === Fri Feb 28, 2025, 03:09:32
|
||||
[47] Biber.pm:419> INFO - Reading 'EEMLA.bcf'
|
||||
[81] Biber.pm:979> INFO - Found 5 citekeys in bib section 0
|
||||
[89] Biber.pm:4419> INFO - Processing section 0
|
||||
[95] Biber.pm:4610> INFO - Looking for bibtex file 'references.bib' for section 0
|
||||
[96] bibtex.pm:1713> INFO - LaTeX decoding ...
|
||||
[98] bibtex.pm:1519> INFO - Found BibTeX data source 'references.bib'
|
||||
[147] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[147] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[147] Biber.pm:4239> INFO - Sorting list 'nty/global//global/global' of type 'entry' with template 'nty' and locale 'en-US'
|
||||
[147] Biber.pm:4245> INFO - No sort tailoring available for locale 'en-US'
|
||||
[151] bbl.pm:660> INFO - Writing 'EEMLA.bbl' with encoding 'UTF-8'
|
||||
[153] bbl.pm:763> INFO - Output to EEMLA.bbl
|
||||
[158] Biber.pm:131> WARN - legacy month field 'Oct' in entry 'Wang2024' is not an integer - this will probably not sort properly.
|
||||
[158] Biber.pm:133> INFO - WARNINGS: 1
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# Fdb version 4
|
||||
["biber EEMLA"] 1740733013.7148 "EEMLA.bcf" "EEMLA.bbl" "EEMLA" 1740733218.14824 0
|
||||
"EEMLA.bcf" 1740733218.11349 125486 fd2cc3ea398de230c8b44dae74685005 "pdflatex"
|
||||
"references.bib" 1740732945.89131 2369 a24b87b373146d16ecacc3c3e385073c ""
|
||||
["biber EEMLA"] 1740733771.78129 "EEMLA.bcf" "EEMLA.bbl" "EEMLA" 1740734037.48048 0
|
||||
"EEMLA.bcf" 1740734037.43891 125690 4155d58441e993b3027d98478b79ba44 "pdflatex"
|
||||
"references.bib" 1740733456.80564 3070 ea3b68cbb126530b1a63cc48bb0b269c ""
|
||||
(generated)
|
||||
"EEMLA.bbl"
|
||||
"EEMLA.blg"
|
||||
(rewritten before read)
|
||||
["pdflatex"] 1740733217.68731 "EEMLA.tex" "EEMLA.pdf" "EEMLA" 1740733218.14847 0
|
||||
["pdflatex"] 1740734036.93952 "EEMLA.tex" "EEMLA.pdf" "EEMLA" 1740734037.48071 0
|
||||
"/usr/share/texmf-dist/fonts/enc/dvips/base/8r.enc" 1736268207 4850 80dc9bab7f31fb78a000ccfed0e27cab ""
|
||||
"/usr/share/texmf-dist/fonts/map/fontname/texfonts.map" 1736268207 3524 cb3e574dea2d1052e39280babc910dc8 ""
|
||||
"/usr/share/texmf-dist/fonts/tfm/adobe/times/ptmb7t.tfm" 1736268207 2172 fd0c924230362ff848a33632ed45dc23 ""
|
||||
@@ -89,10 +89,10 @@
|
||||
"/usr/share/texmf-dist/web2c/texmf.cnf" 1736268207 41588 b43d3e860a4f94167ee1e725ff526a72 ""
|
||||
"/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1740110826.09194 5318467 8236939515598d005dfbcc2e1a40f206 ""
|
||||
"/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1740110801 7112991 1dc785529b0f131eb9fafe88e40ba344 ""
|
||||
"EEMLA.aux" 1740733218.11256 818 607726c98a04d660d760eb9790b6e498 "pdflatex"
|
||||
"EEMLA.bbl" 1740733014.15262 9373 a739baae76801a6eb23a4ae3e4219b4a "biber EEMLA"
|
||||
"EEMLA.run.xml" 1740733218.11417 2437 a20ebb97c6b6df9ec77b4a047cab8b74 "pdflatex"
|
||||
"EEMLA.tex" 1740733217.00648 9156 7874abaeef9e25ad97e500f377e8f691 ""
|
||||
"EEMLA.aux" 1740734037.43362 1194 d65c4da28ffe7bc193abacb534f92591 "pdflatex"
|
||||
"EEMLA.bbl" 1740733772.2618 15689 117eb34aa21a650891b593351c04032d "biber EEMLA"
|
||||
"EEMLA.run.xml" 1740734037.4402 2437 a20ebb97c6b6df9ec77b4a047cab8b74 "pdflatex"
|
||||
"EEMLA.tex" 1740734035.87077 9984 9978488e4ad8fb9a76630f78a2204fe8 ""
|
||||
(generated)
|
||||
"EEMLA.aux"
|
||||
"EEMLA.bcf"
|
||||
|
||||
12
EEMLA.log
12
EEMLA.log
@@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024/Arch Linux) (preloaded format=pdflatex 2025.2.20) 28 FEB 2025 03:00
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024/Arch Linux) (preloaded format=pdflatex 2025.2.20) 28 FEB 2025 03:13
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
file:line:error style messages enabled.
|
||||
@@ -509,15 +509,15 @@ Package logreq Info: Writing requests to 'EEMLA.run.xml'.
|
||||
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
10810 strings out of 476076
|
||||
205922 string characters out of 5793775
|
||||
10817 strings out of 476076
|
||||
206378 string characters out of 5793775
|
||||
1933187 words of memory out of 5000000
|
||||
32822 multiletter control sequences out of 15000+600000
|
||||
32829 multiletter control sequences out of 15000+600000
|
||||
566628 words of font info for 54 fonts, out of 8000000 for 9000
|
||||
14 hyphenation exceptions out of 8191
|
||||
72i,11n,81p,1163b,2647s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
72i,11n,81p,1313b,2647s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmr8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
|
||||
Output written on EEMLA.pdf (5 pages, 109428 bytes).
|
||||
Output written on EEMLA.pdf (5 pages, 110014 bytes).
|
||||
PDF statistics:
|
||||
71 PDF objects out of 1000 (max. 8388607)
|
||||
43 compressed objects within 1 object stream
|
||||
|
||||
BIN
EEMLA.synctex.gz
BIN
EEMLA.synctex.gz
Binary file not shown.
@@ -100,7 +100,7 @@ Efforts to increase the performance of LLMs tend to include provisions for an in
|
||||
|
||||
Benchmarks for evaluating Large Language Models (LLMs) assess their performance across various tasks, including reasoning, comprehension, generation, and factual accuracy. Standard benchmarks include GLUE and SuperGLUE for natural language understanding, MMLU (Massive Multitask Language Understanding) for evaluating knowledge across diverse subjects, and BIG-bench for measuring reasoning and generalization capabilities \parencite[8]{ivanov2024}. HELLASWAG and LAMBADA test commonsense reasoning and long-range dependency understanding, while TruthfulQA and BBQ assess biases, factual consistency, and ethical alignment \parencite[6]{ivanov2024}. Additionally, human evaluations and BLEU, ROUGE, and METEOR scores help measure text generation quality. As LLMs advance, new benchmarks continuously emerge to capture nuances in performance, efficiency, and ethical behavior.
|
||||
|
||||
Adding to the complexity of creating increasingly more performant are the computational and capital costs of building AI-capable supercomputers, clusters, and data centers for corpora, or CLM text databases. Improvements in model architecture are sought before attempts to increase the scale of models and their parameter counts because of the prohibitive scaling laws of neural networks.
|
||||
Adding to the complexity of creating increasingly more performant are the computational and capital costs of building AI-capable supercomputers, clusters, and data centers for corpora, or CLM text databases. Improvements in model architecture are sought before attempts to increase the scale of models and their parameter counts because of the prohibitive scaling laws of neural networks. Experimentally, it has been found that increased parameter size has an exponential relationship with FLOPs of computational cost \parencite[2]{hoffmann2022trainingcomputeoptimallargelanguage}. This is seen in relation to the exponentially slowing gain in CLM accuracy with increased compute \parencite[5]{hoffmann2022trainingcomputeoptimallargelanguage}. This is taken to mean that there is a point at which scaling a model to gain accuracy is unsustainable. The Chinchilla scaling law is an experimentally conjectured hypothesis which states that an increase in model scale for a given architecture will tend to reducing model performance as the number of parameters tends to infinity. Although some teams claim to have statistically significant results to disprove it, these results have not been reaffirmed by third parties.
|
||||
|
||||
|
||||
%%%%Works cited
|
||||
|
||||
@@ -32,5 +32,12 @@ DOI = {10.3390/app14020744}
|
||||
primaryClass={cs.DC},
|
||||
url={https://arxiv.org/abs/2412.01020},
|
||||
}
|
||||
|
||||
|
||||
@misc{hoffmann2022trainingcomputeoptimallargelanguage,
|
||||
title={Training Compute-Optimal Large Language Models},
|
||||
author={Jordan Hoffmann and Sebastian Borgeaud and Arthur Mensch and Elena Buchatskaya and Trevor Cai and Eliza Rutherford and Diego de Las Casas and Lisa Anne Hendricks and Johannes Welbl and Aidan Clark and Tom Hennigan and Eric Noland and Katie Millican and George van den Driessche and Bogdan Damoc and Aurelia Guy and Simon Osindero and Karen Simonyan and Erich Elsen and Jack W. Rae and Oriol Vinyals and Laurent Sifre},
|
||||
year={2022},
|
||||
eprint={2203.15556},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL},
|
||||
url={https://arxiv.org/abs/2203.15556},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user