i'm basically done
This commit is contained in:
@@ -1,21 +1,24 @@
|
||||
\relax
|
||||
\citation{hendrycks2021measuringmathematicalproblemsolving,ahn2024largelanguagemodelsmathematical}
|
||||
\citation{hendrycks2021measuringmathematicalproblemsolving}
|
||||
\citation{ahn2024largelanguagemodelsmathematical}
|
||||
\citation{cobbe2021trainingverifierssolvemath}
|
||||
\citation{hoffmann2022trainingcomputeoptimallargelanguage}
|
||||
\citation{wang2024neuralsymbolicoverview}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{}\protected@file@percent }
|
||||
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Comparison of LLM Computational Requirements}}{1}{}\protected@file@percent }
|
||||
\newlabel{tab:model-sizes}{{I}{1}{}{table.1}{}}
|
||||
\citation{wang2024neuralsymbolicoverview}
|
||||
\citation{hendrycksmath2021}
|
||||
\citation{hendrycksmath2021}
|
||||
\citation{ahn2024largelanguagemodelsmathematical}
|
||||
\citation{besiroglu2024chinchillascalingreplicationattempt}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {II}Related Works}{2}{}\protected@file@percent }
|
||||
\citation{besiroglu2024chinchillascalingreplicationattempt}
|
||||
\citation{besold2017neuralsymboliclearningreasoningsurvey}
|
||||
\citation{gao2023palprogramaidedlanguagemodels}
|
||||
\citation{xu2024chatglmmathimprovingmathproblemsolving}
|
||||
\citation{petruzzellis2024assessingemergentsymbolicreasoning}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {III}Methods}{3}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Baseline MLP Feed-Forward Block}{3}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}Symbolic Mutation of the Second Linear Layer}{3}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}Symbolic Mutation of the Second Linear Layer}{4}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0a}Masking}{4}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0b}Selective Extraction}{4}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0c}Linear Encoding}{4}{}\protected@file@percent }
|
||||
@@ -28,9 +31,6 @@
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-D}}Training Details}{4}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {IV}Results}{4}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-A}}Evaluation Overview}{4}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-B}}Accuracy Comparison Across Models}{4}{}\protected@file@percent }
|
||||
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Final Answer Accuracy (\%) Across Benchmarks}}{4}{}\protected@file@percent }
|
||||
\newlabel{tab:benchmark-accuracy}{{I}{4}{}{table.1}{}}
|
||||
\bibstyle{IEEEtran}
|
||||
\bibdata{references}
|
||||
\bibcite{hendrycks2021measuringmathematicalproblemsolving}{1}
|
||||
@@ -43,9 +43,12 @@
|
||||
\bibcite{gao2023palprogramaidedlanguagemodels}{8}
|
||||
\bibcite{xu2024chatglmmathimprovingmathproblemsolving}{9}
|
||||
\bibcite{petruzzellis2024assessingemergentsymbolicreasoning}{10}
|
||||
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Final Answer Accuracy (\%) Across Benchmarks}}{5}{}\protected@file@percent }
|
||||
\newlabel{tab:benchmark-accuracy}{{II}{5}{}{table.2}{}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-B}}Accuracy Comparison Across Models}{5}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-C}}Generalization to Multi-step Reasoning}{5}{}\protected@file@percent }
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Step-wise solution accuracy across increasing solution step counts. Rule-mutated model generalizes significantly better to long-horizon reasoning. Error bars represent 95\% confidence intervals.}}{5}{}\protected@file@percent }
|
||||
\newlabel{fig:step-accuracy}{{1}{5}{}{figure.1}{}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-C}}Generalization to Multi-step Reasoning}{5}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-D}}Summary of Statistical Measures}{5}{}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{References}{5}{}\protected@file@percent }
|
||||
\gdef \@abspage@last{5}
|
||||
|
||||
@@ -22,7 +22,11 @@
|
||||
\BIBdecl
|
||||
|
||||
\bibitem{hendrycks2021measuringmathematicalproblemsolving}
|
||||
|
||||
\BIBentryALTinterwordspacing
|
||||
D.~Hendrycks, C.~Burns, S.~Kadavath, A.~Arora, S.~Basart, E.~Tang, D.~Song, and
|
||||
J.~Steinhardt, ``Measuring mathematical problem solving with the math
|
||||
dataset,'' 2021. [Online]. Available: \url{https://arxiv.org/abs/2103.03874}
|
||||
\BIBentrySTDinterwordspacing
|
||||
|
||||
\bibitem{ahn2024largelanguagemodelsmathematical}
|
||||
\BIBentryALTinterwordspacing
|
||||
|
||||
@@ -8,9 +8,9 @@ Reallocated singl_function (elt_size=8) to 100 items from 50.
|
||||
Reallocated wiz_functions (elt_size=8) to 6000 items from 3000.
|
||||
Reallocated singl_function (elt_size=8) to 100 items from 50.
|
||||
Database file #1: references.bib
|
||||
"{" immediately follows a field name---line 47 of file references.bib
|
||||
: @article
|
||||
: {hendrycksmath2021,
|
||||
Repeated entry---line 106 of file references.bib
|
||||
: @article{hendrycksmath2021
|
||||
: ,
|
||||
I'm skipping whatever remains of this entry
|
||||
Repeated entry---line 123 of file references.bib
|
||||
: @misc{ahn2024largelanguagemodelsmathematical
|
||||
@@ -20,48 +20,47 @@ Warning--I didn't find a database entry for "wang2024neuralsymbolicoverview"
|
||||
-- IEEEtran.bst version 1.12 (2007/01/11) by Michael Shell.
|
||||
-- http://www.michaelshell.org/tex/ieeetran/bibtex/
|
||||
-- See the "IEEEtran_bst_HOWTO.pdf" manual for usage information.
|
||||
Warning--all relevant fields are empty in hendrycks2021measuringmathematicalproblemsolving
|
||||
|
||||
Done.
|
||||
You've used 10 entries,
|
||||
4024 wiz_defined-function locations,
|
||||
851 strings with 10081 characters,
|
||||
and the built_in function-call counts, 6363 in all, are:
|
||||
= -- 361
|
||||
> -- 366
|
||||
852 strings with 10113 characters,
|
||||
and the built_in function-call counts, 6786 in all, are:
|
||||
= -- 396
|
||||
> -- 398
|
||||
< -- 0
|
||||
+ -- 179
|
||||
- -- 89
|
||||
* -- 324
|
||||
:= -- 918
|
||||
add.period$ -- 18
|
||||
+ -- 194
|
||||
- -- 97
|
||||
* -- 352
|
||||
:= -- 990
|
||||
add.period$ -- 20
|
||||
call.type$ -- 10
|
||||
change.case$ -- 9
|
||||
change.case$ -- 10
|
||||
chr.to.int$ -- 0
|
||||
cite$ -- 11
|
||||
duplicate$ -- 466
|
||||
empty$ -- 514
|
||||
format.name$ -- 98
|
||||
if$ -- 1393
|
||||
cite$ -- 10
|
||||
duplicate$ -- 487
|
||||
empty$ -- 534
|
||||
format.name$ -- 107
|
||||
if$ -- 1486
|
||||
int.to.chr$ -- 0
|
||||
int.to.str$ -- 10
|
||||
missing$ -- 147
|
||||
newline$ -- 69
|
||||
num.names$ -- 9
|
||||
pop$ -- 459
|
||||
missing$ -- 155
|
||||
newline$ -- 71
|
||||
num.names$ -- 10
|
||||
pop$ -- 482
|
||||
preamble$ -- 1
|
||||
purify$ -- 0
|
||||
quote$ -- 2
|
||||
skip$ -- 430
|
||||
skip$ -- 453
|
||||
stack$ -- 0
|
||||
substring$ -- 9
|
||||
swap$ -- 320
|
||||
substring$ -- 10
|
||||
swap$ -- 344
|
||||
text.length$ -- 0
|
||||
text.prefix$ -- 0
|
||||
top$ -- 5
|
||||
type$ -- 10
|
||||
warning$ -- 1
|
||||
while$ -- 9
|
||||
warning$ -- 0
|
||||
while$ -- 10
|
||||
width$ -- 12
|
||||
write$ -- 114
|
||||
write$ -- 120
|
||||
(There were 2 error messages)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 19 JUL 2025 15:57
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 19 JUL 2025 17:12
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
%&-line parsing enabled.
|
||||
@@ -258,13 +258,33 @@ Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1370.
|
||||
Package xcolor Info: Model `Gray' substituted by `gray' on input line 1371.
|
||||
Package xcolor Info: Model `wave' substituted by `hsb' on input line 1372.
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/latex/booktabs/booktabs.sty
|
||||
Package: booktabs 2020/01/12 v1.61803398 Publication quality tables
|
||||
\heavyrulewidth=\dimen176
|
||||
\lightrulewidth=\dimen177
|
||||
\cmidrulewidth=\dimen178
|
||||
\belowrulesep=\dimen179
|
||||
\belowbottomsep=\dimen180
|
||||
\aboverulesep=\dimen181
|
||||
\abovetopsep=\dimen182
|
||||
\cmidrulesep=\dimen183
|
||||
\cmidrulekern=\dimen184
|
||||
\defaultaddspace=\dimen185
|
||||
\@cmidla=\count293
|
||||
\@cmidlb=\count294
|
||||
\@aboverulesep=\dimen186
|
||||
\@belowrulesep=\dimen187
|
||||
\@thisruleclass=\count295
|
||||
\@lastruleclass=\count296
|
||||
\@thisrulewidth=\dimen188
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty
|
||||
(/usr/share/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty
|
||||
(/usr/share/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
|
||||
\pgfutil@everybye=\toks24
|
||||
\pgfutil@tempdima=\dimen176
|
||||
\pgfutil@tempdimb=\dimen177
|
||||
\pgfutil@tempdima=\dimen189
|
||||
\pgfutil@tempdimb=\dimen190
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
|
||||
\pgfutil@abb=\box56
|
||||
@@ -288,26 +308,26 @@ Package: pgfsys 2023-01-15 v3.1.10 (3.1.10)
|
||||
x
|
||||
\pgfkeys@tmptoks=\toks27
|
||||
))
|
||||
\pgf@x=\dimen178
|
||||
\pgf@y=\dimen179
|
||||
\pgf@xa=\dimen180
|
||||
\pgf@ya=\dimen181
|
||||
\pgf@xb=\dimen182
|
||||
\pgf@yb=\dimen183
|
||||
\pgf@xc=\dimen184
|
||||
\pgf@yc=\dimen185
|
||||
\pgf@xd=\dimen186
|
||||
\pgf@yd=\dimen187
|
||||
\pgf@x=\dimen191
|
||||
\pgf@y=\dimen192
|
||||
\pgf@xa=\dimen193
|
||||
\pgf@ya=\dimen194
|
||||
\pgf@xb=\dimen195
|
||||
\pgf@yb=\dimen196
|
||||
\pgf@xc=\dimen197
|
||||
\pgf@yc=\dimen198
|
||||
\pgf@xd=\dimen199
|
||||
\pgf@yd=\dimen256
|
||||
\w@pgf@writea=\write3
|
||||
\r@pgf@reada=\read2
|
||||
\c@pgf@counta=\count293
|
||||
\c@pgf@countb=\count294
|
||||
\c@pgf@countc=\count295
|
||||
\c@pgf@countd=\count296
|
||||
\c@pgf@counta=\count297
|
||||
\c@pgf@countb=\count298
|
||||
\c@pgf@countc=\count299
|
||||
\c@pgf@countd=\count300
|
||||
\t@pgf@toka=\toks28
|
||||
\t@pgf@tokb=\toks29
|
||||
\t@pgf@tokc=\toks30
|
||||
\pgf@sys@id@count=\count297
|
||||
\pgf@sys@id@count=\count301
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
|
||||
File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10)
|
||||
)
|
||||
@@ -321,8 +341,8 @@ File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10)
|
||||
)))
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex
|
||||
File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgfsyssoftpath@smallbuffer@items=\count298
|
||||
\pgfsyssoftpath@bigbuffer@items=\count299
|
||||
\pgfsyssoftpath@smallbuffer@items=\count302
|
||||
\pgfsyssoftpath@bigbuffer@items=\count303
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex
|
||||
File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
@@ -333,8 +353,8 @@ Package: pgfcore 2023-01-15 v3.1.10 (3.1.10)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
|
||||
\pgfmath@dimen=\dimen188
|
||||
\pgfmath@count=\count300
|
||||
\pgfmath@dimen=\dimen257
|
||||
\pgfmath@count=\count304
|
||||
\pgfmath@box=\box57
|
||||
\pgfmath@toks=\toks31
|
||||
\pgfmath@stack@operand=\toks32
|
||||
@@ -352,52 +372,52 @@ x) (/usr/share/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics
|
||||
.code.tex) (/usr/share/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
|
||||
\c@pgfmathroundto@lastzeros=\count301
|
||||
\c@pgfmathroundto@lastzeros=\count305
|
||||
))
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfint.code.tex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex
|
||||
File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@picminx=\dimen189
|
||||
\pgf@picmaxx=\dimen190
|
||||
\pgf@picminy=\dimen191
|
||||
\pgf@picmaxy=\dimen192
|
||||
\pgf@pathminx=\dimen193
|
||||
\pgf@pathmaxx=\dimen194
|
||||
\pgf@pathminy=\dimen195
|
||||
\pgf@pathmaxy=\dimen196
|
||||
\pgf@xx=\dimen197
|
||||
\pgf@xy=\dimen198
|
||||
\pgf@yx=\dimen199
|
||||
\pgf@yy=\dimen256
|
||||
\pgf@zx=\dimen257
|
||||
\pgf@zy=\dimen258
|
||||
\pgf@picminx=\dimen258
|
||||
\pgf@picmaxx=\dimen259
|
||||
\pgf@picminy=\dimen260
|
||||
\pgf@picmaxy=\dimen261
|
||||
\pgf@pathminx=\dimen262
|
||||
\pgf@pathmaxx=\dimen263
|
||||
\pgf@pathminy=\dimen264
|
||||
\pgf@pathmaxy=\dimen265
|
||||
\pgf@xx=\dimen266
|
||||
\pgf@xy=\dimen267
|
||||
\pgf@yx=\dimen268
|
||||
\pgf@yy=\dimen269
|
||||
\pgf@zx=\dimen270
|
||||
\pgf@zy=\dimen271
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex
|
||||
File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@path@lastx=\dimen259
|
||||
\pgf@path@lasty=\dimen260
|
||||
\pgf@path@lastx=\dimen272
|
||||
\pgf@path@lasty=\dimen273
|
||||
) (/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex
|
||||
File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@shorten@end@additional=\dimen261
|
||||
\pgf@shorten@start@additional=\dimen262
|
||||
\pgf@shorten@end@additional=\dimen274
|
||||
\pgf@shorten@start@additional=\dimen275
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex
|
||||
File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgfpic=\box58
|
||||
\pgf@hbox=\box59
|
||||
\pgf@layerbox@main=\box60
|
||||
\pgf@picture@serial@count=\count302
|
||||
\pgf@picture@serial@count=\count306
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex
|
||||
File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgflinewidth=\dimen263
|
||||
\pgflinewidth=\dimen276
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.t
|
||||
ex
|
||||
File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@pt@x=\dimen264
|
||||
\pgf@pt@y=\dimen265
|
||||
\pgf@pt@temp=\dimen266
|
||||
\pgf@pt@x=\dimen277
|
||||
\pgf@pt@y=\dimen278
|
||||
\pgf@pt@temp=\dimen279
|
||||
) (/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
|
||||
File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
)
|
||||
@@ -409,13 +429,13 @@ x
|
||||
File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
) (/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex
|
||||
File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgfarrowsep=\dimen267
|
||||
\pgfarrowsep=\dimen280
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
|
||||
File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@max=\dimen268
|
||||
\pgf@sys@shading@range@num=\count303
|
||||
\pgf@shadingcount=\count304
|
||||
\pgf@max=\dimen281
|
||||
\pgf@sys@shading@range@num=\count307
|
||||
\pgf@shadingcount=\count308
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
|
||||
File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
@@ -444,8 +464,8 @@ File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty
|
||||
Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@nodesepstart=\dimen269
|
||||
\pgf@nodesepend=\dimen270
|
||||
\pgf@nodesepstart=\dimen282
|
||||
\pgf@nodesepend=\dimen283
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty
|
||||
Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10)
|
||||
@@ -457,8 +477,8 @@ Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex))
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
|
||||
Package: pgffor 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgffor@iter=\dimen271
|
||||
\pgffor@skip=\dimen272
|
||||
\pgffor@iter=\dimen284
|
||||
\pgffor@skip=\dimen285
|
||||
\pgffor@stack=\toks34
|
||||
\pgffor@toks=\toks35
|
||||
))
|
||||
@@ -468,32 +488,32 @@ Package: tikz 2023-01-15 v3.1.10 (3.1.10)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.te
|
||||
x
|
||||
File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgf@plot@mark@count=\count305
|
||||
\pgfplotmarksize=\dimen273
|
||||
\pgf@plot@mark@count=\count309
|
||||
\pgfplotmarksize=\dimen286
|
||||
)
|
||||
\tikz@lastx=\dimen274
|
||||
\tikz@lasty=\dimen275
|
||||
\tikz@lastxsaved=\dimen276
|
||||
\tikz@lastysaved=\dimen277
|
||||
\tikz@lastmovetox=\dimen278
|
||||
\tikz@lastmovetoy=\dimen279
|
||||
\tikzleveldistance=\dimen280
|
||||
\tikzsiblingdistance=\dimen281
|
||||
\tikz@lastx=\dimen287
|
||||
\tikz@lasty=\dimen288
|
||||
\tikz@lastxsaved=\dimen289
|
||||
\tikz@lastysaved=\dimen290
|
||||
\tikz@lastmovetox=\dimen291
|
||||
\tikz@lastmovetoy=\dimen292
|
||||
\tikzleveldistance=\dimen293
|
||||
\tikzsiblingdistance=\dimen294
|
||||
\tikz@figbox=\box63
|
||||
\tikz@figbox@bg=\box64
|
||||
\tikz@tempbox=\box65
|
||||
\tikz@tempbox@bg=\box66
|
||||
\tikztreelevel=\count306
|
||||
\tikznumberofchildren=\count307
|
||||
\tikznumberofcurrentchild=\count308
|
||||
\tikz@fig@count=\count309
|
||||
\tikztreelevel=\count310
|
||||
\tikznumberofchildren=\count311
|
||||
\tikznumberofcurrentchild=\count312
|
||||
\tikz@fig@count=\count313
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
|
||||
File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
\pgfmatrixcurrentrow=\count310
|
||||
\pgfmatrixcurrentcolumn=\count311
|
||||
\pgf@matrix@numberofcolumns=\count312
|
||||
\pgfmatrixcurrentrow=\count314
|
||||
\pgfmatrixcurrentcolumn=\count315
|
||||
\pgf@matrix@numberofcolumns=\count316
|
||||
)
|
||||
\tikz@expandcount=\count313
|
||||
\tikz@expandcount=\count317
|
||||
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrary
|
||||
topaths.code.tex
|
||||
@@ -507,9 +527,9 @@ Package: pgfplots 2021/05/15 v1.18.1 Data Visualization (1.18.1)
|
||||
\t@pgfplots@toka=\toks36
|
||||
\t@pgfplots@tokb=\toks37
|
||||
\t@pgfplots@tokc=\toks38
|
||||
\pgfplots@tmpa=\dimen282
|
||||
\c@pgfplots@coordindex=\count314
|
||||
\c@pgfplots@scanlineindex=\count315
|
||||
\pgfplots@tmpa=\dimen295
|
||||
\c@pgfplots@coordindex=\count318
|
||||
\c@pgfplots@scanlineindex=\count319
|
||||
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/sys/pgfplotssysgeneric.code.tex))
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/libs/pgfplotslibrary.code.tex)
|
||||
@@ -530,13 +550,13 @@ gfutil-common-lists.tex))
|
||||
ext.code.tex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/liststructure/pgfplotsarray.code.te
|
||||
x
|
||||
\c@pgfplotsarray@tmp=\count316
|
||||
\c@pgfplotsarray@tmp=\count320
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/liststructure/pgfplotsmatrix.code.t
|
||||
ex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/numtable/pgfplotstableshared.code.t
|
||||
ex
|
||||
\c@pgfplotstable@counta=\count317
|
||||
\c@pgfplotstable@counta=\count321
|
||||
\t@pgfplotstable@a=\toks42
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/liststructure/pgfplotsdeque.code.te
|
||||
@@ -545,7 +565,7 @@ x) (/usr/share/texmf-dist/tex/generic/pgfplots/util/pgfplotsbinary.code.tex
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/util/pgfplotsutil.verb.code.tex)
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/libs/pgflibrarypgfplots.surfshading
|
||||
.code.tex
|
||||
\c@pgfplotslibrarysurf@no=\count318
|
||||
\c@pgfplotslibrarysurf@no=\count322
|
||||
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/sys/pgflibrarypgfplots.surfshading.
|
||||
pgfsys-pdftex.def)))
|
||||
@@ -564,14 +584,14 @@ pgfsys-pdftex.def)))
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrary
|
||||
decorations.code.tex
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/modules/pgfmoduledecorations.code.tex
|
||||
\pgfdecoratedcompleteddistance=\dimen283
|
||||
\pgfdecoratedremainingdistance=\dimen284
|
||||
\pgfdecoratedinputsegmentcompleteddistance=\dimen285
|
||||
\pgfdecoratedinputsegmentremainingdistance=\dimen286
|
||||
\pgf@decorate@distancetomove=\dimen287
|
||||
\pgf@decorate@repeatstate=\count319
|
||||
\pgfdecorationsegmentamplitude=\dimen288
|
||||
\pgfdecorationsegmentlength=\dimen289
|
||||
\pgfdecoratedcompleteddistance=\dimen296
|
||||
\pgfdecoratedremainingdistance=\dimen297
|
||||
\pgfdecoratedinputsegmentcompleteddistance=\dimen298
|
||||
\pgfdecoratedinputsegmentremainingdistance=\dimen299
|
||||
\pgf@decorate@distancetomove=\dimen300
|
||||
\pgf@decorate@repeatstate=\count323
|
||||
\pgfdecorationsegmentamplitude=\dimen301
|
||||
\pgfdecorationsegmentlength=\dimen302
|
||||
)
|
||||
\tikz@lib@dec@box=\box67
|
||||
)
|
||||
@@ -585,13 +605,13 @@ decorations.pathreplacing.code.tex
|
||||
ons.pathreplacing.code.tex))
|
||||
(/usr/share/texmf-dist/tex/generic/pgfplots/libs/tikzlibrarypgfplots.contourlua
|
||||
.code.tex)
|
||||
\pgfplots@numplots=\count320
|
||||
\pgfplots@xmin@reg=\dimen290
|
||||
\pgfplots@xmax@reg=\dimen291
|
||||
\pgfplots@ymin@reg=\dimen292
|
||||
\pgfplots@ymax@reg=\dimen293
|
||||
\pgfplots@zmin@reg=\dimen294
|
||||
\pgfplots@zmax@reg=\dimen295
|
||||
\pgfplots@numplots=\count324
|
||||
\pgfplots@xmin@reg=\dimen303
|
||||
\pgfplots@xmax@reg=\dimen304
|
||||
\pgfplots@ymin@reg=\dimen305
|
||||
\pgfplots@ymax@reg=\dimen306
|
||||
\pgfplots@zmin@reg=\dimen307
|
||||
\pgfplots@zmax@reg=\dimen308
|
||||
)
|
||||
(/usr/share/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrary
|
||||
plotmarks.code.tex
|
||||
@@ -601,40 +621,40 @@ File: tikzlibraryplotmarks.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
File: pgflibraryplotmarks.code.tex 2023-01-15 v3.1.10 (3.1.10)
|
||||
))) (/usr/share/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
|
||||
File: l3backend-pdftex.def 2024-05-08 L3 backend support: PDF output (pdfTeX)
|
||||
\l__color_backend_stack_int=\count321
|
||||
\l__color_backend_stack_int=\count325
|
||||
\l__pdf_internal_box=\box68
|
||||
)
|
||||
(./IEEE-conference-template-062824.aux)
|
||||
\openout1 = `IEEE-conference-template-062824.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 20.
|
||||
LaTeX Font Info: ... okay on input line 20.
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 22.
|
||||
LaTeX Font Info: ... okay on input line 22.
|
||||
|
||||
-- Lines per column: 56 (exact).
|
||||
(/usr/share/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
|
||||
[Loading MPS to PDF converter (version 2006.09.02).]
|
||||
\scratchcounter=\count322
|
||||
\scratchdimen=\dimen296
|
||||
\scratchcounter=\count326
|
||||
\scratchdimen=\dimen309
|
||||
\scratchbox=\box69
|
||||
\nofMPsegments=\count323
|
||||
\nofMParguments=\count324
|
||||
\nofMPsegments=\count327
|
||||
\nofMParguments=\count328
|
||||
\everyMPshowfont=\toks43
|
||||
\MPscratchCnt=\count325
|
||||
\MPscratchDim=\dimen297
|
||||
\MPnumerator=\count326
|
||||
\makeMPintoPDFobject=\count327
|
||||
\MPscratchCnt=\count329
|
||||
\MPscratchDim=\dimen310
|
||||
\MPnumerator=\count330
|
||||
\makeMPintoPDFobject=\count331
|
||||
\everyMPtoPDFconversion=\toks44
|
||||
) (/usr/share/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
|
||||
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
|
||||
@@ -648,15 +668,112 @@ e
|
||||
Package pgfplots notification 'compat/show suggested version=true': document ha
|
||||
s been generated with the most recent feature set (\pgfplotsset{compat=1.18}).
|
||||
|
||||
! Missing } inserted.
|
||||
<inserted text>
|
||||
}
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Missing \cr inserted.
|
||||
<inserted text>
|
||||
\cr
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Misplaced \crcr.
|
||||
\IEEEauthorblockA ...lockAinterlinespace }#1\crcr
|
||||
\ifCLASSOPTIONtransmag \gd...
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Misplaced \crcr.
|
||||
\end@IEEEauthorhalign ->\crcr
|
||||
\egroup \egroup
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Extra }, or forgotten \endgroup.
|
||||
\end@IEEEauthorhalign ->\crcr \egroup \egroup
|
||||
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Missing } inserted.
|
||||
<inserted text>
|
||||
}
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Missing \cr inserted.
|
||||
<inserted text>
|
||||
\cr
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Misplaced \crcr.
|
||||
\IEEEauthorblockA ...lockAinterlinespace }#1\crcr
|
||||
\ifCLASSOPTIONtransmag \gd...
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Misplaced \crcr.
|
||||
\end@IEEEauthorhalign ->\crcr
|
||||
\egroup \egroup
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Extra }, or forgotten \endgroup.
|
||||
\end@IEEEauthorhalign ->\crcr \egroup \egroup
|
||||
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Missing } inserted.
|
||||
<inserted text>
|
||||
}
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Missing \cr inserted.
|
||||
<inserted text>
|
||||
\cr
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Misplaced \crcr.
|
||||
\IEEEauthorblockA ...lockAinterlinespace }#1\crcr
|
||||
\ifCLASSOPTIONtransmag \gd...
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Misplaced \crcr.
|
||||
\end@IEEEauthorhalign ->\crcr
|
||||
\egroup \egroup
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
! Extra }, or forgotten \endgroup.
|
||||
\end@IEEEauthorhalign ->\crcr \egroup \egroup
|
||||
|
||||
l.32 \maketitle
|
||||
|
||||
?
|
||||
|
||||
|
||||
LaTeX Font Info: Trying to load font information for U+msa on input line 51.
|
||||
|
||||
LaTeX Warning: Reference `tab:model-sizes' on page 1 undefined on input line 47
|
||||
.
|
||||
(/usr/share/texmf-dist/tex/latex/amsfonts/umsa.fd
|
||||
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||
)
|
||||
LaTeX Font Info: Trying to load font information for U+msb on input line 51.
|
||||
|
||||
|
||||
LaTeX Warning: Citation `wang2024neuralsymbolicoverview' on page 1 undefined on
|
||||
input line 53.
|
||||
(/usr/share/texmf-dist/tex/latex/amsfonts/umsb.fd
|
||||
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||
)
|
||||
Overfull \hbox (23.70222pt too wide) in paragraph at lines 51--65
|
||||
[]
|
||||
[]
|
||||
|
||||
|
||||
|
||||
@@ -666,24 +783,17 @@ ts/enc/dvips/base/8r.enc}
|
||||
|
||||
]
|
||||
|
||||
LaTeX Warning: Citation `wang2024neuralsymbolicoverview' on page 2 undefined on
|
||||
input line 81.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[2]
|
||||
|
||||
LaTeX Font Info: Trying to load font information for U+msa on input line 93.
|
||||
|
||||
(/usr/share/texmf-dist/tex/latex/amsfonts/umsa.fd
|
||||
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||
)
|
||||
LaTeX Font Info: Trying to load font information for U+msb on input line 93.
|
||||
|
||||
|
||||
(/usr/share/texmf-dist/tex/latex/amsfonts/umsb.fd
|
||||
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||
)
|
||||
LaTeX Font Info: Trying to load font information for OT1+pcr on input line 1
|
||||
21.
|
||||
|
||||
49.
|
||||
(/usr/share/texmf-dist/tex/latex/psnfss/ot1pcr.fd
|
||||
File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
|
||||
)
|
||||
@@ -719,13 +829,13 @@ LaTeX Warning: There were undefined references.
|
||||
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
24637 strings out of 475171
|
||||
634405 string characters out of 5767095
|
||||
1176773 words of memory out of 5000000
|
||||
47250 multiletter control sequences out of 15000+600000
|
||||
24707 strings out of 475171
|
||||
635575 string characters out of 5767095
|
||||
1178595 words of memory out of 5000000
|
||||
47316 multiletter control sequences out of 15000+600000
|
||||
600748 words of font info for 113 fonts, out of 8000000 for 9000
|
||||
14 hyphenation exceptions out of 8191
|
||||
84i,19n,87p,1559b,2586s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
84i,19n,87p,1139b,2563s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/
|
||||
texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/texmf-dist/fon
|
||||
ts/type1/public/amsfonts/cm/cmmi5.pfb></usr/share/texmf-dist/fonts/type1/public
|
||||
@@ -739,7 +849,7 @@ b></usr/share/texmf-dist/fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texmf-d
|
||||
ist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/tim
|
||||
es/utmbi8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmr8a.pfb></usr/sh
|
||||
are/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
|
||||
Output written on IEEE-conference-template-062824.pdf (5 pages, 193633 bytes).
|
||||
Output written on IEEE-conference-template-062824.pdf (5 pages, 196579 bytes).
|
||||
PDF statistics:
|
||||
104 PDF objects out of 1000 (max. 8388607)
|
||||
64 compressed objects within 1 object stream
|
||||
|
||||
Binary file not shown.
@@ -9,6 +9,8 @@
|
||||
\usepackage{graphicx}
|
||||
\usepackage{textcomp}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{booktabs}
|
||||
|
||||
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
|
||||
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
|
||||
|
||||
@@ -22,28 +24,54 @@
|
||||
\title{Rule-based Tensor Mutations Embedded within LLMs for Low-Cost Mathematical Computation}
|
||||
|
||||
\author{\IEEEauthorblockN{Srikrishna Ayyalasomayajula}
|
||||
\IEEEauthorblockA{\textit{dept. name of organization (of Aff.)} \\
|
||||
\textit{name of organization (of Aff.)}\\
|
||||
City, Country \\
|
||||
\IEEEauthorblockA{\textit{
|
||||
Plano, Texas \\
|
||||
krishna@ayyalasomayajula.net}
|
||||
}
|
||||
|
||||
}
|
||||
\maketitle
|
||||
|
||||
\begin{abstract}
|
||||
Large Language Models (LLMs) have demonstrated remarkable proficiency in natural language tasks but remain inefficient and error-prone when performing deterministic mathematical computations. Existing approaches to improving mathematical reasoning rely on external symbolic engines or extensive fine-tuning on mathematical corpora, both of which introduce latency and scalability challenges. This paper proposes a novel architectural enhancement for transformer-based LLMs: the embedding of deterministic, rule-based tensor mutations directly within the model’s internal computational graph. By implementing fixed-index tensor operations—such as arithmetic functions, binary operations, and matrix computations—within the embedding space of the Llama 3 3B model, we enable low-latency mathematical reasoning without modifying the core probabilistic architecture. The proposed system leverages deterministic computation pathways optimized for GPU tensor cores, significantly reducing inference latency and improving mathematical accuracy on arithmetic and linear algebra tasks. %Experimental results on benchmark datasets demonstrate up to a 3.7× reduction in inference latency for mathematical prompts and a 24\% increase in accuracy compared to baseline LLM performance, all without additional fine-tuning. This work highlights the potential for integrating rule-based logic into neural network inference, bridging the gap between probabilistic language modeling and deterministic computation.%
|
||||
Large Language Models (LLMs) have demonstrated remarkable proficiency in natural language tasks but remain inefficient and error-prone when performing deterministic mathematical computations. Existing approaches to improving mathematical reasoning rely on external symbolic engines or extensive fine-tuning on mathematical corpora, both of which introduce latency and scalability challenges. This paper proposes a novel architectural enhancement for transformer-based LLMs: the embedding of deterministic, rule-based tensor mutations directly within the model’s internal computational graph. By implementing fixed-index tensor operations—such as arithmetic functions, binary operations, and matrix computations—within the embedding space of the Llama 3 3B model, we enable low-latency mathematical reasoning without modifying the core probabilistic architecture. The proposed system leverages deterministic computation pathways optimized for GPU tensor cores, significantly reducing inference latency and improving mathematical accuracy on arithmetic and linear algebra tasks.
|
||||
\end{abstract}
|
||||
|
||||
\begin{IEEEkeywords}
|
||||
component, formatting, style, styling, insert.
|
||||
Multi-Layer Perceptron (MLP), Rule-based Mutation, Neural Network Architecture, Language Models, LLaMA, Long-Horizon Reasoning, Step-wise Accuracy, Model Generalization, Deep Learning, Artificial Intelligence, Training Efficiency, Inference Optimization, Neural Computation, Architecture Search, Mutated MLPs, Model Scaling, Structural Inductive Bias, Token-wise Evaluation, Parametric Efficiency, High-Performance Computing, Transformer Models, Cognitive Tasks, Reasoning Benchmarking, Neuro-Symbolic Integration.
|
||||
\end{IEEEkeywords}
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
Large Language Models (LLMs) have rapidly advanced the field of natural language processing (NLP), achieving unprecedented success across tasks such as text generation, summarization, translation, and conversational reasoning. These models, built upon transformer architectures, learn statistical patterns in tokenized language data through extensive pretraining on vast corpora. However, despite their proficiency in language understanding, LLMs consistently underperform on tasks that require deterministic mathematical computation \cite{hendrycks2021measuringmathematicalproblemsolving, ahn2024largelanguagemodelsmathematical}. This limitation stems from the fundamentally probabilistic nature of neural network inference, which excels at pattern recognition but lacks the precise symbolic manipulation capabilities required for accurate mathematical reasoning.
|
||||
Large Language Models (LLMs) have rapidly advanced the field of natural language processing (NLP), achieving unprecedented success across tasks such as text generation, summarization, translation, and conversational reasoning. These models, built upon transformer architectures, learn statistical patterns in tokenized language data through extensive pretraining on vast corpora. However, despite their proficiency in language understanding, LLMs consistently underperform on tasks that require deterministic mathematical computation \cite{hendrycks2021measuringmathematicalproblemsolving} \cite{ahn2024largelanguagemodelsmathematical}. This limitation stems from the fundamentally probabilistic nature of neural network inference, which excels at pattern recognition but lacks the precise symbolic manipulation capabilities required for accurate mathematical reasoning.
|
||||
|
||||
Current approaches to improving the mathematical competence of LLMs follow two main paradigms. The first involves fine-tuning models on specialized mathematical datasets \cite{cobbe2021trainingverifierssolvemath}, such as arithmetic sequences, calculus problems, or algebraic equations. While fine-tuning improves performance on familiar problems, it is both computationally expensive and brittle when generalizing to unseen operations or data distributions. The second paradigm leverages Retrieval-Augmented Generation (RAG) pipelines that offload computation to external symbolic engines such as Wolfram Alpha. Though effective in some contexts, these solutions introduce substantial inference latency due to the need for external API calls and often compromise the seamless, end-to-end nature of LLM inference pipelines.
|
||||
|
||||
\begin{table}[htbp]
|
||||
\caption{Comparison of LLM Computational Requirements}
|
||||
\begin{center}
|
||||
\begin{tabular}{|l|c|c|c|}
|
||||
\hline
|
||||
\textbf{Model Name} & \textbf{Compute (PF-days)} & \textbf{Inference (ms/tkn.)} & \textbf{VRAM (GB)} \\
|
||||
\hline
|
||||
GPT-2 & 5.6 & 12 & 3 \\
|
||||
GPT-3 & 3,640 & 75 & 350 \\
|
||||
LLaMA-2-7B & 184 & 18 & 14 \\
|
||||
LLaMA-2-13B & 368 & 32 & 26 \\
|
||||
LLaMA-2-70B & 1,720 & 145 & 140 \\
|
||||
Claude 2 & N/A & 82 & $\sim$200 \\
|
||||
GPT-4 & $\sim$25,000 & 210 & $\sim$3,000 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\label{tab:model-sizes}
|
||||
\end{center}
|
||||
\vspace{2mm}
|
||||
\begin{minipage}{0.95\linewidth}
|
||||
\footnotesize
|
||||
\textit{Note—} Training compute is measured in petaflop-days. Inference time is reported per token on an A100 GPU. Memory usage denotes peak VRAM during inference. Proprietary model figures are estimates.
|
||||
\end{minipage}
|
||||
\end{table}
|
||||
|
||||
|
||||
|
||||
Moreover, scaling LLMs to address such shortcomings faces practical limitations. Empirical scaling laws \cite{hoffmann2022trainingcomputeoptimallargelanguage} demonstrate that beyond a certain point, increasing the number of model parameters yields diminishing returns in accuracy relative to computational cost. This is particularly evident in mathematical reasoning benchmarks, where larger models show sub-linear performance improvements despite exponential increases in compute and memory consumption. As Table~\ref{tab:model-sizes} illustrates, state-of-the-art models such as GPT-4 and Claude 2 require thousands of petaflop-days of compute and terabytes of memory, yet they still fail to achieve high accuracy on elementary arithmetic problems without external assistance.
|
||||
|
||||
This paper addresses this gap by proposing a fundamentally different approach: embedding deterministic, rule-based tensor mutations directly within the neural network's computational graph. Instead of relying solely on statistical learning, this method introduces explicit, hard-coded mathematical operations into specific locations of the model's embedding space. By leveraging the high parallelism of modern GPUs, particularly tensor core architectures optimized for Single Instruction, Multiple Data (SIMD) workloads, these operations execute with minimal latency and no dependence on external inference pathways.
|
||||
|
||||
@@ -42,14 +42,6 @@ DOI = {10.3390/app14020744}
|
||||
url={https://arxiv.org/abs/2203.15556},
|
||||
}
|
||||
@misc{hendrycks2021measuringmathematicalproblemsolving,
|
||||
|
||||
|
||||
@article{hendrycksmath2021,
|
||||
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
||||
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
|
||||
journal={NeurIPS},
|
||||
year={2021}
|
||||
}
|
||||
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
||||
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
|
||||
year={2021},
|
||||
@@ -57,7 +49,15 @@ DOI = {10.3390/app14020744}
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.LG},
|
||||
url={https://arxiv.org/abs/2103.03874},
|
||||
}
|
||||
|
||||
@article{hendrycksmath2021,
|
||||
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
||||
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
|
||||
journal={NeurIPS},
|
||||
year={2021}
|
||||
}
|
||||
|
||||
@misc{ahn2024largelanguagemodelsmathematical,
|
||||
title={Large Language Models for Mathematical Reasoning: Progresses and Challenges},
|
||||
author={Janice Ahn and Rishu Verma and Renze Lou and Di Liu and Rui Zhang and Wenpeng Yin},
|
||||
|
||||
Reference in New Issue
Block a user