cooked
This commit is contained in:
17
EEMLA.aux
17
EEMLA.aux
@@ -20,11 +20,12 @@
|
||||
\abx@aux@segm{0}{0}{hendrycks2021measuringmathematicalproblemsolving}
|
||||
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Comparison of LLM Sizes and Their Computational Requirements}}{5}{}\protected@file@percent }
|
||||
\newlabel{tab:model-sizes}{{1}{5}{}{table.1}{}}
|
||||
\abx@aux@read@bbl@mdfivesum{C2B7A906B5BBD5BA46851A4078C3BC58}
|
||||
\abx@aux@defaultrefcontext{0}{hendrycks2021measuringmathematicalproblemsolving}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{hoffmann2022trainingcomputeoptimallargelanguage}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{ivanov2024}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{app14020744}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{vaswani2023attentionneed}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{Wang2024}{nty/global//global/global/global}
|
||||
\gdef \@abspage@last{7}
|
||||
\abx@aux@cite{0}{ahn2024largelanguagemodelsmathematical}
|
||||
\abx@aux@segm{0}{0}{ahn2024largelanguagemodelsmathematical}
|
||||
\abx@aux@cite{0}{cobbe2021trainingverifierssolvemath}
|
||||
\abx@aux@segm{0}{0}{cobbe2021trainingverifierssolvemath}
|
||||
\abx@aux@cite{0}{cobbe2021trainingverifierssolvemath}
|
||||
\abx@aux@segm{0}{0}{cobbe2021trainingverifierssolvemath}
|
||||
\abx@aux@read@bbl@mdfivesum{nohash}
|
||||
\abx@aux@read@bblrerun
|
||||
\gdef \@abspage@last{8}
|
||||
|
||||
482
EEMLA.bbl
482
EEMLA.bbl
@@ -1,482 +0,0 @@
|
||||
% $ biblatex auxiliary file $
|
||||
% $ biblatex bbl format version 3.3 $
|
||||
% Do not modify the above lines!
|
||||
%
|
||||
% This is an auxiliary file used by the 'biblatex' package.
|
||||
% This file may safely be deleted. It will be recreated by
|
||||
% biber as required.
|
||||
%
|
||||
\begingroup
|
||||
\makeatletter
|
||||
\@ifundefined{ver@biblatex.sty}
|
||||
{\@latex@error
|
||||
{Missing 'biblatex' package}
|
||||
{The bibliography requires the 'biblatex' package.}
|
||||
\aftergroup\endinput}
|
||||
{}
|
||||
\endgroup
|
||||
|
||||
|
||||
\refsection{0}
|
||||
\datalist[entry]{nty/global//global/global/global}
|
||||
\entry{hendrycks2021measuringmathematicalproblemsolving}{misc}{}{}
|
||||
\name{author}{8}{}{%
|
||||
{{un=0,uniquepart=base,hash=86d0b4ecd6b6066d49e7aecde6e5e630}{%
|
||||
family={Hendrycks},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Dan},
|
||||
giveni={D\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=624605ebd8b6b403b251522e3d1029ff}{%
|
||||
family={Burns},
|
||||
familyi={B\bibinitperiod},
|
||||
given={Collin},
|
||||
giveni={C\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=ae50339c3e9c1418ade64e74fa002ef5}{%
|
||||
family={Kadavath},
|
||||
familyi={K\bibinitperiod},
|
||||
given={Saurav},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=a3e8b92acb1bf8b2b186f6b10d4ae704}{%
|
||||
family={Arora},
|
||||
familyi={A\bibinitperiod},
|
||||
given={Akul},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=35f72302a7aebfd095414ce218b8fb5d}{%
|
||||
family={Basart},
|
||||
familyi={B\bibinitperiod},
|
||||
given={Steven},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=ffd1907ef0a8bbed89fa04d783f236b6}{%
|
||||
family={Tang},
|
||||
familyi={T\bibinitperiod},
|
||||
given={Eric},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=d458d5f9b64652055bd81d57e4777b22}{%
|
||||
family={Song},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Dawn},
|
||||
giveni={D\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=fe7a7e80c1857d185d3ab01f15fe584d}{%
|
||||
family={Steinhardt},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Jacob},
|
||||
giveni={J\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{f461c117afa272374439d2db3ee67282}
|
||||
\strng{fullhash}{821f0c67ea3dbc37f0422b8c2bda174e}
|
||||
\strng{fullhashraw}{821f0c67ea3dbc37f0422b8c2bda174e}
|
||||
\strng{bibnamehash}{f461c117afa272374439d2db3ee67282}
|
||||
\strng{authorbibnamehash}{f461c117afa272374439d2db3ee67282}
|
||||
\strng{authornamehash}{f461c117afa272374439d2db3ee67282}
|
||||
\strng{authorfullhash}{821f0c67ea3dbc37f0422b8c2bda174e}
|
||||
\strng{authorfullhashraw}{821f0c67ea3dbc37f0422b8c2bda174e}
|
||||
\field{sortinit}{H}
|
||||
\field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{eprintclass}{cs.LG}
|
||||
\field{eprinttype}{arXiv}
|
||||
\field{title}{Measuring Mathematical Problem Solving With the MATH Dataset}
|
||||
\field{year}{2021}
|
||||
\verb{eprint}
|
||||
\verb 2103.03874
|
||||
\endverb
|
||||
\verb{urlraw}
|
||||
\verb arxiv.org/abs/2103.03874
|
||||
\endverb
|
||||
\verb{url}
|
||||
\verb arxiv.org/abs/2103.03874
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{hoffmann2022trainingcomputeoptimallargelanguage}{misc}{}{}
|
||||
\name{author}{22}{}{%
|
||||
{{un=0,uniquepart=base,hash=92d226c4ea870c0e9df2ceb8bf03d8ff}{%
|
||||
family={Hoffmann},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Jordan},
|
||||
giveni={J\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=c28d74501d531d6672ccf4c28016c7ac}{%
|
||||
family={Borgeaud},
|
||||
familyi={B\bibinitperiod},
|
||||
given={Sebastian},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=e9a587c57b1dcb4834850800386d8aca}{%
|
||||
family={Mensch},
|
||||
familyi={M\bibinitperiod},
|
||||
given={Arthur},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=eee8c4b85d5c4e1f4bc70218d34ba69d}{%
|
||||
family={Buchatskaya},
|
||||
familyi={B\bibinitperiod},
|
||||
given={Elena},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=3d7a83ed6eb983ca17cec804631dc22e}{%
|
||||
family={Cai},
|
||||
familyi={C\bibinitperiod},
|
||||
given={Trevor},
|
||||
giveni={T\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=5cc07c10e9f1ef5fae367662553d982c}{%
|
||||
family={Rutherford},
|
||||
familyi={R\bibinitperiod},
|
||||
given={Eliza},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=47f81e65396e1d91b7ab70d59f69329b}{%
|
||||
family={Las\bibnamedelima Casas},
|
||||
familyi={L\bibinitperiod\bibinitdelim C\bibinitperiod},
|
||||
given={Diego},
|
||||
giveni={D\bibinitperiod},
|
||||
givenun=0,
|
||||
prefix={de},
|
||||
prefixi={d\bibinitperiod}}}%
|
||||
{{un=0,uniquepart=base,hash=68b917d11d355ed641ee448751fb3ba5}{%
|
||||
family={Hendricks},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Lisa\bibnamedelima Anne},
|
||||
giveni={L\bibinitperiod\bibinitdelim A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=7a7fdb4cb72d04e1eaa8a6a2ca0358dc}{%
|
||||
family={Welbl},
|
||||
familyi={W\bibinitperiod},
|
||||
given={Johannes},
|
||||
giveni={J\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=b06242c53973106520ab19f8e1fc3799}{%
|
||||
family={Clark},
|
||||
familyi={C\bibinitperiod},
|
||||
given={Aidan},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=0c1f2b1cf15f9e15216a9e652512bf6f}{%
|
||||
family={Hennigan},
|
||||
familyi={H\bibinitperiod},
|
||||
given={Tom},
|
||||
giveni={T\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=70b3914180795f8c87317cbd0599c942}{%
|
||||
family={Noland},
|
||||
familyi={N\bibinitperiod},
|
||||
given={Eric},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=da626c813aa81825001015989a23f850}{%
|
||||
family={Millican},
|
||||
familyi={M\bibinitperiod},
|
||||
given={Katie},
|
||||
giveni={K\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=54c5c572d4fdc5d822c240b59fcadad4}{%
|
||||
family={Driessche},
|
||||
familyi={D\bibinitperiod},
|
||||
given={George},
|
||||
giveni={G\bibinitperiod},
|
||||
givenun=0,
|
||||
prefix={van\bibnamedelima den},
|
||||
prefixi={v\bibinitperiod\bibinitdelim d\bibinitperiod}}}%
|
||||
{{un=0,uniquepart=base,hash=4da5c824138e58f9def8fd792b5ad94f}{%
|
||||
family={Damoc},
|
||||
familyi={D\bibinitperiod},
|
||||
given={Bogdan},
|
||||
giveni={B\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=8bc2c63cf1c4c6e9e4d317839dda4de2}{%
|
||||
family={Guy},
|
||||
familyi={G\bibinitperiod},
|
||||
given={Aurelia},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=643f92e8f89f2746a4c1aa077d225755}{%
|
||||
family={Osindero},
|
||||
familyi={O\bibinitperiod},
|
||||
given={Simon},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=9d16b7284df92c9adaee86c37ab992df}{%
|
||||
family={Simonyan},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Karen},
|
||||
giveni={K\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=6aa92a937d4d30dd0b5ec0eecbad1bf1}{%
|
||||
family={Elsen},
|
||||
familyi={E\bibinitperiod},
|
||||
given={Erich},
|
||||
giveni={E\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=8dcdeb16ef3c68cf8396226668804fe0}{%
|
||||
family={Rae},
|
||||
familyi={R\bibinitperiod},
|
||||
given={Jack\bibnamedelima W.},
|
||||
giveni={J\bibinitperiod\bibinitdelim W\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=494b568c5dc85ba8f3f409635f9c5f25}{%
|
||||
family={Vinyals},
|
||||
familyi={V\bibinitperiod},
|
||||
given={Oriol},
|
||||
giveni={O\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=50d24de916599d306c5cb1a77156e4b9}{%
|
||||
family={Sifre},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Laurent},
|
||||
giveni={L\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{fullhash}{fb6d92421541c28738787855a47da527}
|
||||
\strng{fullhashraw}{fb6d92421541c28738787855a47da527}
|
||||
\strng{bibnamehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{authorbibnamehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{authornamehash}{8be79936575bb0d5b77c96da6d2a6d52}
|
||||
\strng{authorfullhash}{fb6d92421541c28738787855a47da527}
|
||||
\strng{authorfullhashraw}{fb6d92421541c28738787855a47da527}
|
||||
\field{sortinit}{H}
|
||||
\field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{eprintclass}{cs.CL}
|
||||
\field{eprinttype}{arXiv}
|
||||
\field{title}{Training Compute-Optimal Large Language Models}
|
||||
\field{year}{2022}
|
||||
\verb{eprint}
|
||||
\verb 2203.15556
|
||||
\endverb
|
||||
\verb{urlraw}
|
||||
\verb arxiv.org/abs/2203.15556
|
||||
\endverb
|
||||
\verb{url}
|
||||
\verb arxiv.org/abs/2203.15556
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{ivanov2024}{misc}{}{}
|
||||
\name{author}{2}{}{%
|
||||
{{un=0,uniquepart=base,hash=1935b6f0043d4bac823842ff5d478faf}{%
|
||||
family={Ivanov},
|
||||
familyi={I\bibinitperiod},
|
||||
given={Todor},
|
||||
giveni={T\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=495d2036e580863cb79ad80a7bb2fc78}{%
|
||||
family={Penchev},
|
||||
familyi={P\bibinitperiod},
|
||||
given={Valeri},
|
||||
giveni={V\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{fullhash}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{fullhashraw}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{bibnamehash}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{authorbibnamehash}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{authornamehash}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{authorfullhash}{259e51507bbcab245b7267c088c1998f}
|
||||
\strng{authorfullhashraw}{259e51507bbcab245b7267c088c1998f}
|
||||
\field{sortinit}{I}
|
||||
\field{sortinithash}{8d291c51ee89b6cd86bf5379f0b151d8}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{eprintclass}{cs.DC}
|
||||
\field{eprinttype}{arXiv}
|
||||
\field{title}{AI Benchmarks and Datasets for LLM Evaluation}
|
||||
\field{year}{2024}
|
||||
\verb{eprint}
|
||||
\verb 2412.01020
|
||||
\endverb
|
||||
\verb{urlraw}
|
||||
\verb arxiv.org/abs/2412.01020
|
||||
\endverb
|
||||
\verb{url}
|
||||
\verb arxiv.org/abs/2412.01020
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{app14020744}{article}{}{}
|
||||
\name{author}{1}{}{%
|
||||
{{un=0,uniquepart=base,hash=d8c43e5429158fe51408ffa847a4a856}{%
|
||||
family={Testolin},
|
||||
familyi={T\bibinitperiod},
|
||||
given={Alberto},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{fullhash}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{fullhashraw}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{bibnamehash}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{authorbibnamehash}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{authornamehash}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{authorfullhash}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\strng{authorfullhashraw}{d8c43e5429158fe51408ffa847a4a856}
|
||||
\field{sortinit}{T}
|
||||
\field{sortinithash}{9af77f0292593c26bde9a56e688eaee9}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{abstract}{Creating learning models that can exhibit sophisticated reasoning abilities is one of the greatest challenges in deep learning research, and mathematics is rapidly becoming one of the target domains for assessing scientific progress in this direction. In the past few years there has been an explosion of neural network architectures, datasets, and benchmarks specifically designed to tackle mathematical problems, reporting impressive achievements in disparate fields such as automated theorem proving, numerical integration, and the discovery of new conjectures or matrix multiplication algorithms. However, despite this notable success it is still unclear whether deep learning models possess an elementary understanding of quantities and numbers. This survey critically examines the recent literature, concluding that even state-of-the-art architectures and large language models often fall short when probed with relatively simple tasks designed to test basic numerical and arithmetic knowledge.}
|
||||
\field{issn}{2076-3417}
|
||||
\field{journaltitle}{Applied Sciences}
|
||||
\field{number}{2}
|
||||
\field{title}{Can Neural Networks Do Arithmetic? A Survey on the Elementary Numerical Skills of State-of-the-Art Deep Learning Models}
|
||||
\field{volume}{14}
|
||||
\field{year}{2024}
|
||||
\verb{doi}
|
||||
\verb 10.3390/app14020744
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{vaswani2023attentionneed}{misc}{}{}
|
||||
\name{author}{8}{}{%
|
||||
{{un=0,uniquepart=base,hash=7f28e84700536646dd6620a0db07ad09}{%
|
||||
family={Vaswani},
|
||||
familyi={V\bibinitperiod},
|
||||
given={Ashish},
|
||||
giveni={A\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=62efade83d70f0323fe248755e6c90c5}{%
|
||||
family={Shazeer},
|
||||
familyi={S\bibinitperiod},
|
||||
given={Noam},
|
||||
giveni={N\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=06649ebab1ea5cac0250746a19764975}{%
|
||||
family={Parmar},
|
||||
familyi={P\bibinitperiod},
|
||||
given={Niki},
|
||||
giveni={N\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=831027ee0ebf22375e2a86afc1881909}{%
|
||||
family={Uszkoreit},
|
||||
familyi={U\bibinitperiod},
|
||||
given={Jakob},
|
||||
giveni={J\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=2fd2982e30ebcec93ec1cf76e0d797fd}{%
|
||||
family={Jones},
|
||||
familyi={J\bibinitperiod},
|
||||
given={Llion},
|
||||
giveni={L\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=27b07e4eacbf4ef7a1438e3badb7dd8d}{%
|
||||
family={Gomez},
|
||||
familyi={G\bibinitperiod},
|
||||
given={Aidan\bibnamedelima N.},
|
||||
giveni={A\bibinitperiod\bibinitdelim N\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=f2bc899b1160163417da7bf510f15d33}{%
|
||||
family={Kaiser},
|
||||
familyi={K\bibinitperiod},
|
||||
given={Lukasz},
|
||||
giveni={L\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=95595a0fefb86187cbc36e551017d332}{%
|
||||
family={Polosukhin},
|
||||
familyi={P\bibinitperiod},
|
||||
given={Illia},
|
||||
giveni={I\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{ee273ab30cfb889666f8c4d806eb9ce7}
|
||||
\strng{fullhash}{f82970bbd2bdd7a002d2af62b743d5cc}
|
||||
\strng{fullhashraw}{f82970bbd2bdd7a002d2af62b743d5cc}
|
||||
\strng{bibnamehash}{ee273ab30cfb889666f8c4d806eb9ce7}
|
||||
\strng{authorbibnamehash}{ee273ab30cfb889666f8c4d806eb9ce7}
|
||||
\strng{authornamehash}{ee273ab30cfb889666f8c4d806eb9ce7}
|
||||
\strng{authorfullhash}{f82970bbd2bdd7a002d2af62b743d5cc}
|
||||
\strng{authorfullhashraw}{f82970bbd2bdd7a002d2af62b743d5cc}
|
||||
\field{sortinit}{V}
|
||||
\field{sortinithash}{afb52128e5b4dc4b843768c0113d673b}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{eprintclass}{cs.CL}
|
||||
\field{eprinttype}{arXiv}
|
||||
\field{title}{Attention Is All You Need}
|
||||
\field{year}{2023}
|
||||
\verb{eprint}
|
||||
\verb 1706.03762
|
||||
\endverb
|
||||
\verb{urlraw}
|
||||
\verb arxiv.org/abs/1706.03762
|
||||
\endverb
|
||||
\verb{url}
|
||||
\verb arxiv.org/abs/1706.03762
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{Wang2024}{article}{}{}
|
||||
\name{author}{6}{}{%
|
||||
{{un=0,uniquepart=base,hash=7cca10cee48e9c197439e4af610acfe5}{%
|
||||
family={Wang},
|
||||
familyi={W\bibinitperiod},
|
||||
given={Zichong},
|
||||
giveni={Z\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=bc41c1f771e246b6d8ce744badb182e3}{%
|
||||
family={Chu},
|
||||
familyi={C\bibinitperiod},
|
||||
given={Zhibo},
|
||||
giveni={Z\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=82b0035db67db8bd400c34e8a5eec07b}{%
|
||||
family={Doan},
|
||||
familyi={D\bibinitperiod},
|
||||
given={Thang\bibnamedelima Viet},
|
||||
giveni={T\bibinitperiod\bibinitdelim V\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=852b650254c75a15c1024df13b29189c}{%
|
||||
family={Ni},
|
||||
familyi={N\bibinitperiod},
|
||||
given={Shiwen},
|
||||
giveni={S\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=feb96ca112c179e320db2db693e022b8}{%
|
||||
family={Yang},
|
||||
familyi={Y\bibinitperiod},
|
||||
given={Min},
|
||||
giveni={M\bibinitperiod},
|
||||
givenun=0}}%
|
||||
{{un=0,uniquepart=base,hash=dd4baede28b306ab6d37dd79d89a935b}{%
|
||||
family={Zhang},
|
||||
familyi={Z\bibinitperiod},
|
||||
given={Wenbin},
|
||||
giveni={W\bibinitperiod},
|
||||
givenun=0}}%
|
||||
}
|
||||
\strng{namehash}{1c3e58e991d8f7a6ae5aee1e95c5cd8a}
|
||||
\strng{fullhash}{391aec39c1c26e8d5e7517c1ab227456}
|
||||
\strng{fullhashraw}{391aec39c1c26e8d5e7517c1ab227456}
|
||||
\strng{bibnamehash}{1c3e58e991d8f7a6ae5aee1e95c5cd8a}
|
||||
\strng{authorbibnamehash}{1c3e58e991d8f7a6ae5aee1e95c5cd8a}
|
||||
\strng{authornamehash}{1c3e58e991d8f7a6ae5aee1e95c5cd8a}
|
||||
\strng{authorfullhash}{391aec39c1c26e8d5e7517c1ab227456}
|
||||
\strng{authorfullhashraw}{391aec39c1c26e8d5e7517c1ab227456}
|
||||
\field{sortinit}{W}
|
||||
\field{sortinithash}{4315d78024d0cea9b57a0c6f0e35ed0d}
|
||||
\true{singletitle}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{journaltitle}{AI and Ethics}
|
||||
\field{month}{10}
|
||||
\field{title}{History, development, and principles of large language models: An introductory survey}
|
||||
\field{year}{2024}
|
||||
\verb{doi}
|
||||
\verb 10.1007/s43681-024-00583-7
|
||||
\endverb
|
||||
\endentry
|
||||
\enddatalist
|
||||
\endrefsection
|
||||
\endinput
|
||||
|
||||
@@ -2784,6 +2784,9 @@
|
||||
<bcf:citekey order="7" intorder="1">hoffmann2022trainingcomputeoptimallargelanguage</bcf:citekey>
|
||||
<bcf:citekey order="8" intorder="1">hoffmann2022trainingcomputeoptimallargelanguage</bcf:citekey>
|
||||
<bcf:citekey order="9" intorder="1">hendrycks2021measuringmathematicalproblemsolving</bcf:citekey>
|
||||
<bcf:citekey order="10" intorder="1">ahn2024largelanguagemodelsmathematical</bcf:citekey>
|
||||
<bcf:citekey order="11" intorder="1">cobbe2021trainingverifierssolvemath</bcf:citekey>
|
||||
<bcf:citekey order="12" intorder="1">cobbe2021trainingverifierssolvemath</bcf:citekey>
|
||||
</bcf:section>
|
||||
<!-- SORTING TEMPLATES -->
|
||||
<bcf:sortingtemplate name="nty">
|
||||
|
||||
18
EEMLA.blg
18
EEMLA.blg
@@ -1,17 +1,5 @@
|
||||
[0] Config.pm:308> INFO - This is Biber 2.20
|
||||
[0] Config.pm:311> INFO - Logfile is 'EEMLA.blg'
|
||||
[41] biber:340> INFO - === Tue Apr 29, 2025, 20:53:51
|
||||
[49] Biber.pm:420> INFO - Reading 'EEMLA.bcf'
|
||||
[85] Biber.pm:994> INFO - Found 6 citekeys in bib section 0
|
||||
[93] Biber.pm:4463> INFO - Processing section 0
|
||||
[98] Biber.pm:4654> INFO - Looking for bibtex file 'references.bib' for section 0
|
||||
[98] bibtex.pm:1713> INFO - LaTeX decoding ...
|
||||
[102] bibtex.pm:1519> INFO - Found BibTeX data source 'references.bib'
|
||||
[160] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[160] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[160] Biber.pm:4283> INFO - Sorting list 'nty/global//global/global/global' of type 'entry' with template 'nty' and locale 'en-US'
|
||||
[160] Biber.pm:4289> INFO - No sort tailoring available for locale 'en-US'
|
||||
[165] bbl.pm:676> INFO - Writing 'EEMLA.bbl' with encoding 'UTF-8'
|
||||
[168] bbl.pm:779> INFO - Output to EEMLA.bbl
|
||||
[169] Biber.pm:131> WARN - legacy month field 'Oct' in entry 'Wang2024' is not an integer - this will probably not sort properly.
|
||||
[169] Biber.pm:133> INFO - WARNINGS: 1
|
||||
[39] biber:340> INFO - === Tue Apr 29, 2025, 22:30:03
|
||||
[47] Utils.pm:410> ERROR - EEMLA.bcf is malformed, last biblatex run probably failed. Deleted EEMLA.bbl
|
||||
[47] Biber.pm:136> INFO - ERRORS: 1
|
||||
|
||||
262
EEMLA.log
262
EEMLA.log
@@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.3.24) 29 APR 2025 20:53
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 29 APR 2025 22:30
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
%&-line parsing enabled.
|
||||
@@ -404,33 +404,101 @@ Package: float 2001/11/08 v1.3d Float enhancements (AL)
|
||||
\@float@everytoks=\toks22
|
||||
\@floatcapt=\box55
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/latex/amsmath/amsmath.sty
|
||||
Package: amsmath 2024/11/05 v2.17t AMS math features
|
||||
\@mathmargin=\skip89
|
||||
|
||||
For additional information on amsmath, use the `?' option.
|
||||
(/usr/share/texmf-dist/tex/latex/amsmath/amstext.sty
|
||||
Package: amstext 2021/08/26 v2.01 AMS text
|
||||
|
||||
(/usr/share/texmf-dist/tex/latex/amsmath/amsgen.sty
|
||||
File: amsgen.sty 1999/11/30 v2.0 generic functions
|
||||
\@emptytoks=\toks23
|
||||
\ex@=\dimen152
|
||||
))
|
||||
(/usr/share/texmf-dist/tex/latex/amsmath/amsbsy.sty
|
||||
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
|
||||
\pmbraise@=\dimen153
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/latex/amsmath/amsopn.sty
|
||||
Package: amsopn 2022/04/08 v2.04 operator names
|
||||
)
|
||||
\inf@bad=\count404
|
||||
LaTeX Info: Redefining \frac on input line 233.
|
||||
\uproot@=\count405
|
||||
\leftroot@=\count406
|
||||
LaTeX Info: Redefining \overline on input line 398.
|
||||
LaTeX Info: Redefining \colon on input line 409.
|
||||
\classnum@=\count407
|
||||
\DOTSCASE@=\count408
|
||||
LaTeX Info: Redefining \ldots on input line 495.
|
||||
LaTeX Info: Redefining \dots on input line 498.
|
||||
LaTeX Info: Redefining \cdots on input line 619.
|
||||
\Mathstrutbox@=\box56
|
||||
\strutbox@=\box57
|
||||
LaTeX Info: Redefining \big on input line 721.
|
||||
LaTeX Info: Redefining \Big on input line 722.
|
||||
LaTeX Info: Redefining \bigg on input line 723.
|
||||
LaTeX Info: Redefining \Bigg on input line 724.
|
||||
\big@size=\dimen154
|
||||
LaTeX Font Info: Redeclaring font encoding OML on input line 742.
|
||||
LaTeX Font Info: Redeclaring font encoding OMS on input line 743.
|
||||
\macc@depth=\count409
|
||||
LaTeX Info: Redefining \bmod on input line 904.
|
||||
LaTeX Info: Redefining \pmod on input line 909.
|
||||
LaTeX Info: Redefining \smash on input line 939.
|
||||
LaTeX Info: Redefining \relbar on input line 969.
|
||||
LaTeX Info: Redefining \Relbar on input line 970.
|
||||
\c@MaxMatrixCols=\count410
|
||||
\dotsspace@=\muskip22
|
||||
\c@parentequation=\count411
|
||||
\dspbrk@lvl=\count412
|
||||
\tag@help=\toks24
|
||||
\row@=\count413
|
||||
\column@=\count414
|
||||
\maxfields@=\count415
|
||||
\andhelp@=\toks25
|
||||
\eqnshift@=\dimen155
|
||||
\alignsep@=\dimen156
|
||||
\tagshift@=\dimen157
|
||||
\tagwidth@=\dimen158
|
||||
\totwidth@=\dimen159
|
||||
\lineht@=\dimen160
|
||||
\@envbody=\toks26
|
||||
\multlinegap=\skip90
|
||||
\multlinetaggap=\skip91
|
||||
\mathdisplay@stack=\toks27
|
||||
LaTeX Info: Redefining \[ on input line 2953.
|
||||
LaTeX Info: Redefining \] on input line 2954.
|
||||
)
|
||||
|
||||
Package biblatex Warning: Missing 'hyperref' package.
|
||||
(biblatex) Setting 'hyperref=false'.
|
||||
|
||||
\@quotelevel=\count404
|
||||
\@quotereset=\count405
|
||||
\@quotelevel=\count416
|
||||
\@quotereset=\count417
|
||||
LaTeX Font Info: Trying to load font information for OT1+ptm on input line 5
|
||||
6.
|
||||
7.
|
||||
(/usr/share/texmf-dist/tex/latex/psnfss/ot1ptm.fd
|
||||
File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
|
||||
) (./EEMLA.aux)
|
||||
\openout1 = `EEMLA.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 56.
|
||||
LaTeX Font Info: ... okay on input line 56.
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 57.
|
||||
LaTeX Font Info: ... okay on input line 57.
|
||||
|
||||
*geometry* driver: auto-detecting
|
||||
*geometry* detected driver: pdftex
|
||||
@@ -488,39 +556,47 @@ Package biblatex Info: Automatic encoding selection.
|
||||
\openout3 = `EEMLA.bcf'.
|
||||
|
||||
Package biblatex Info: Trying to load bibliographic data...
|
||||
Package biblatex Info: ... file 'EEMLA.bbl' found.
|
||||
(./EEMLA.bbl)
|
||||
Package biblatex Info: Reference section=0 on input line 56.
|
||||
Package biblatex Info: Reference segment=0 on input line 56.
|
||||
Package biblatex Info: ... file 'EEMLA.bbl' not found.
|
||||
|
||||
No file EEMLA.bbl.
|
||||
Package biblatex Info: Reference section=0 on input line 57.
|
||||
Package biblatex Info: Reference segment=0 on input line 57.
|
||||
(/usr/share/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
|
||||
[Loading MPS to PDF converter (version 2006.09.02).]
|
||||
\scratchcounter=\count406
|
||||
\scratchdimen=\dimen152
|
||||
\scratchbox=\box56
|
||||
\nofMPsegments=\count407
|
||||
\nofMParguments=\count408
|
||||
\everyMPshowfont=\toks23
|
||||
\MPscratchCnt=\count409
|
||||
\MPscratchDim=\dimen153
|
||||
\MPnumerator=\count410
|
||||
\makeMPintoPDFobject=\count411
|
||||
\everyMPtoPDFconversion=\toks24
|
||||
\scratchcounter=\count418
|
||||
\scratchdimen=\dimen161
|
||||
\scratchbox=\box58
|
||||
\nofMPsegments=\count419
|
||||
\nofMParguments=\count420
|
||||
\everyMPshowfont=\toks28
|
||||
\MPscratchCnt=\count421
|
||||
\MPscratchDim=\dimen162
|
||||
\MPnumerator=\count422
|
||||
\makeMPintoPDFobject=\count423
|
||||
\everyMPtoPDFconversion=\toks29
|
||||
) (/usr/share/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
|
||||
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
|
||||
|
||||
(/usr/share/texmf-dist/tex/latex/grfext/grfext.sty
|
||||
Package: grfext 2019/12/03 v1.3 Manage graphics extensions (HO)
|
||||
|
||||
(/usr/share/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
|
||||
Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
|
||||
))
|
||||
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
|
||||
85.
|
||||
Package grfext Info: Graphics extension search list:
|
||||
(grfext) [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE
|
||||
G,.JBIG2,.JB2,.eps]
|
||||
(grfext) \AppendGraphicsExtensions on input line 504.
|
||||
|
||||
(/usr/share/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
|
||||
File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
|
||||
e
|
||||
))
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <12> on input line 81.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <8> on input line 81.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <6> on input line 81.
|
||||
|
||||
LaTeX Warning: Citation 'app14020744' on page 1 undefined on input line 80.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -535,6 +611,17 @@ ts/enc/dvips/base/8r.enc}
|
||||
|
||||
]
|
||||
|
||||
LaTeX Warning: Citation 'Wang2024' on page 2 undefined on input line 90.
|
||||
|
||||
|
||||
LaTeX Warning: Citation 'vaswani2023attentionneed' on page 2 undefined on input
|
||||
line 90.
|
||||
|
||||
|
||||
LaTeX Warning: Citation 'Wang2024' on page 2 undefined on input line 92.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
@@ -544,6 +631,13 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
|
||||
[2]
|
||||
|
||||
LaTeX Warning: Citation 'ivanov2024' on page 3 undefined on input line 104.
|
||||
|
||||
|
||||
LaTeX Warning: Citation 'ivanov2024' on page 3 undefined on input line 104.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
@@ -553,6 +647,15 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
|
||||
[3]
|
||||
|
||||
LaTeX Warning: Citation 'hoffmann2022trainingcomputeoptimallargelanguage' on pa
|
||||
ge 4 undefined on input line 106.
|
||||
|
||||
|
||||
LaTeX Warning: Citation 'hoffmann2022trainingcomputeoptimallargelanguage' on pa
|
||||
ge 4 undefined on input line 106.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
@@ -562,6 +665,11 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
|
||||
[4]
|
||||
|
||||
LaTeX Warning: Citation 'hendrycks2021measuringmathematicalproblemsolving' on p
|
||||
age 5 undefined on input line 139.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
@@ -570,6 +678,24 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[5]
|
||||
LaTeX Font Info: Trying to load font information for TS1+ptm on input line 1
|
||||
52.
|
||||
(/usr/share/texmf-dist/tex/latex/psnfss/ts1ptm.fd
|
||||
File: ts1ptm.fd 2001/06/04 font definitions for TS1/ptm.
|
||||
)
|
||||
|
||||
LaTeX Warning: Citation 'ahn2024largelanguagemodelsmathematical' on page 6 unde
|
||||
fined on input line 162.
|
||||
|
||||
|
||||
LaTeX Warning: Citation 'cobbe2021trainingverifierssolvemath' on page 6 undefin
|
||||
ed on input line 162.
|
||||
|
||||
|
||||
LaTeX Warning: Citation 'cobbe2021trainingverifierssolvemath' on page 6 undefin
|
||||
ed on input line 162.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
@@ -579,6 +705,17 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[6]
|
||||
! Undefined control sequence.
|
||||
l.167 ...le-based function } \mathcal{R} : \mathbb
|
||||
{R}^{n \times d} \rightarr...
|
||||
|
||||
?
|
||||
! Undefined control sequence.
|
||||
l.167 ...athbb{R}^{n \times d} \rightarrow \mathbb
|
||||
{R}^{n \times d} \text{ su...
|
||||
|
||||
?
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
@@ -587,35 +724,58 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) You might also make \topmargin smaller:
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[7] (./EEMLA.aux)
|
||||
[7]
|
||||
|
||||
LaTeX Warning: Empty bibliography on input line 186.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
(fancyhdr) \setlength{\headheight}{14.49998pt}.
|
||||
(fancyhdr) You might also make \topmargin smaller:
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[8] (./EEMLA.aux)
|
||||
***********
|
||||
LaTeX2e <2024-11-01> patch level 2
|
||||
L3 programming layer <2025-01-18>
|
||||
***********
|
||||
|
||||
|
||||
LaTeX Warning: There were undefined references.
|
||||
|
||||
|
||||
Package biblatex Warning: Please (re)run Biber on the file:
|
||||
(biblatex) EEMLA
|
||||
(biblatex) and rerun LaTeX afterwards.
|
||||
|
||||
Package logreq Info: Writing requests to 'EEMLA.run.xml'.
|
||||
\openout1 = `EEMLA.run.xml'.
|
||||
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
11293 strings out of 475171
|
||||
216231 string characters out of 5767095
|
||||
1100995 words of memory out of 5000000
|
||||
34187 multiletter control sequences out of 15000+600000
|
||||
568981 words of font info for 56 fonts, out of 8000000 for 9000
|
||||
12030 strings out of 475171
|
||||
224647 string characters out of 5767095
|
||||
1112573 words of memory out of 5000000
|
||||
34895 multiletter control sequences out of 15000+600000
|
||||
571519 words of font info for 64 fonts, out of 8000000 for 9000
|
||||
14 hyphenation exceptions out of 8191
|
||||
72i,11n,81p,1320b,2647s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
72i,18n,81p,1320b,2647s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/share/
|
||||
texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texmf-dist/fon
|
||||
ts/type1/public/amsfonts/cm/cmmi12.pfb></usr/share/texmf-dist/fonts/type1/publi
|
||||
c/amsfonts/cm/cmmi8.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/c
|
||||
mr12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb></usr/s
|
||||
hare/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/share/texmf-dis
|
||||
t/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times
|
||||
/utmr8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
|
||||
Output written on EEMLA.pdf (7 pages, 116835 bytes).
|
||||
t/fonts/type1/public/amsfonts/cm/cmsy8.pfb></usr/share/texmf-dist/fonts/type1/u
|
||||
rw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmr8a.pfb></u
|
||||
sr/share/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
|
||||
Output written on EEMLA.pdf (8 pages, 131155 bytes).
|
||||
PDF statistics:
|
||||
79 PDF objects out of 1000 (max. 8388607)
|
||||
49 compressed objects within 1 object stream
|
||||
87 PDF objects out of 1000 (max. 8388607)
|
||||
54 compressed objects within 1 object stream
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
>
|
||||
]>
|
||||
<requests version="1.0">
|
||||
<internal package="biblatex" priority="9" active="0">
|
||||
<internal package="biblatex" priority="9" active="1">
|
||||
<generic>latex</generic>
|
||||
<provides type="dynamic">
|
||||
<file>EEMLA.bcf</file>
|
||||
@@ -64,7 +64,7 @@
|
||||
<file>english-mla.lbx</file>
|
||||
</requires>
|
||||
</internal>
|
||||
<external package="biblatex" priority="5" active="0">
|
||||
<external package="biblatex" priority="5" active="1">
|
||||
<generic>biber</generic>
|
||||
<cmdline>
|
||||
<binary>biber</binary>
|
||||
|
||||
33
EEMLA.tex
33
EEMLA.tex
@@ -26,6 +26,7 @@
|
||||
%
|
||||
\usepackage{fancyhdr}
|
||||
\usepackage{float}
|
||||
\usepackage{amsmath}
|
||||
\pagestyle{fancy}
|
||||
\lhead{}
|
||||
\chead{}
|
||||
@@ -143,6 +144,38 @@ This research aims to investigate the potential integration of rule-based tensor
|
||||
\textbf{RQ:} How can deterministic rule-based tensor mutations be embedded within LLM architectures to enable more accurate and efficient mathematical operations?
|
||||
\end{quote}
|
||||
|
||||
The significance of this line of inquiry lies in its potential to address a fundamental limitation of current generative AI systems like ChatGPT, Anthropic's Claude, etc. While specialized numeric compute systems exist (e.g. RAG with Wolphram Alpha), they operate independently of the SIMD, low-latency systems of LLMS, leading to sizable latency in communication. This is especially prevalent in workflows involving both mathematical and linguistic reasoning. The integration of computational resources required for such workflows within LLMs could substantially reduce the computational resources required for complex tasks that involve both natural and language processing and mathematical reasoning.
|
||||
|
||||
This infestation focuses specifically on the following mathematical operations:
|
||||
|
||||
\begin{itemize}
|
||||
\item Basic arithmetic (addition, subtraction, multiplication, division)
|
||||
\item Matrix Operations (multiplication, inversion, determinant)
|
||||
\item Binary Opertaions (XOR, AND, NAND, left shift, right shift, OR, complement)
|
||||
\item Array Operations (array sum, as well as the mean, median, mode, standard deviation, variance, and other single variable metrics of a data set)
|
||||
\end{itemize}
|
||||
|
||||
Furthermore, as previously mentioned, the scope of the experiment is limited to implementing these operations within existing open source LLM architectures of moderate scale (1-7 Billion Parameters) as opposed to developing entirely new architectures. This is both because it is desirable to eliminate all sources of subject variability to help ascertain statistical significance, and because of the readily available weights. Namely, the target model for this paper is the Llama-3-3B model, due to its lightweight and fully open source nature.
|
||||
|
||||
{\raggedright \normalsize \textbf{Related Works}}
|
||||
|
||||
Prior research has explored various approaches to improving mathematical reasoning capabilities in LLMs, including specialized training on mathematical corpora \parencite[4]{ahn2024largelanguagemodelsmathematical}. Additionally, other work has been done to fine-tune responses to mathematically demanding prompts using reinforcement learning \parencite[1]{cobbe2021trainingverifierssolvemath}. Others still have tried to add secondary inferences or \textit{Verifiers} to determine accuracy of model outputs when containing computations \parencite[2]{cobbe2021trainingverifierssolvemath}. The immediately evident disadvantage to these approaches is the need for extended training cycles and copious amounts of new corpora. Furthermore, training corpora are required to be similar to testing samples since the strategies outlined above fail to grant models a mechanical performance edge.
|
||||
|
||||
{\raggedright \normalsize \textbf{Methodology}}
|
||||
$$
|
||||
\begin{aligned}
|
||||
&\text{Define a rule-based function } \mathcal{R} : \mathbb{R}^{n \times d} \rightarrow \mathbb{R}^{n \times d} \text{ such that:} \\
|
||||
&\mathcal{R}(\mathbf{X})_i =
|
||||
\begin{cases}
|
||||
\mathbf{X}_i + \mathbf{X}_{i+1}, & \text{if rule is "sum with right neighbor"} \\
|
||||
\det(\mathbf{X}_{i:i+2, j:j+2}), & \text{if rule is "3$\times$3 determinant over submatrix"} \\
|
||||
\mathbf{X}_i, & \text{otherwise}
|
||||
\end{cases} \\
|
||||
&\text{Then pass } \mathcal{R}(\mathbf{X}) \text{ into the modified attention layer: } \mathbf{Z} = \text{Attention}(\mathcal{R}(\mathbf{X}))
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
|
||||
%%%%Works cited
|
||||
\newpage
|
||||
\begin{center}
|
||||
|
||||
@@ -50,3 +50,21 @@ DOI = {10.3390/app14020744}
|
||||
primaryClass={cs.LG},
|
||||
url={https://arxiv.org/abs/2103.03874},
|
||||
}
|
||||
@misc{ahn2024largelanguagemodelsmathematical,
|
||||
title={Large Language Models for Mathematical Reasoning: Progresses and Challenges},
|
||||
author={Janice Ahn and Rishu Verma and Renze Lou and Di Liu and Rui Zhang and Wenpeng Yin},
|
||||
year={2024},
|
||||
eprint={2402.00157},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL},
|
||||
url={https://arxiv.org/abs/2402.00157},
|
||||
}
|
||||
@misc{cobbe2021trainingverifierssolvemath,
|
||||
title={Training Verifiers to Solve Math Word Problems},
|
||||
author={Karl Cobbe and Vineet Kosaraju and Mohammad Bavarian and Mark Chen and Heewoo Jun and Lukasz Kaiser and Matthias Plappert and Jerry Tworek and Jacob Hilton and Reiichiro Nakano and Christopher Hesse and John Schulman},
|
||||
year={2021},
|
||||
eprint={2110.14168},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.LG},
|
||||
url={https://arxiv.org/abs/2110.14168},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user