2512 words, along with related works section

This commit is contained in:
2025-07-06 22:53:56 -05:00
parent 490c663172
commit bc95a4b363
10 changed files with 302 additions and 197 deletions

View File

@@ -4,31 +4,46 @@
\citation{hoffmann2022trainingcomputeoptimallargelanguage}
\citation{wang2024neuralsymbolicoverview}
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {II}Methods}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-A}}Baseline MLP Feed-Forward Block}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-B}}Symbolic Mutation of the Second Linear Layer}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0a}Masking}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0b}Selective Extraction}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0c}Linear Encoding}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0d}Symbolic Rule Function}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0e}Linear Decoding}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0f}Normalization}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0g}Reintegration}{2}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-B}0h}Final Output}{2}{}\protected@file@percent }
\citation{hendrycksmath2021}
\citation{hendrycksmath2021}
\citation{ahn2024largelanguagemodelsmathematical}
\citation{besiroglu2024chinchillascalingreplicationattempt}
\@writefile{toc}{\contentsline {section}{\numberline {II}Related Works}{2}{}\protected@file@percent }
\citation{besold2017neuralsymboliclearningreasoningsurvey}
\citation{gao2023palprogramaidedlanguagemodels}
\citation{xu2024chatglmmathimprovingmathproblemsolving}
\citation{petruzzellis2024assessingemergentsymbolicreasoning}
\@writefile{toc}{\contentsline {section}{\numberline {III}Methods}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Baseline MLP Feed-Forward Block}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}Symbolic Mutation of the Second Linear Layer}{3}{}\protected@file@percent }
\bibstyle{IEEEtran}
\bibdata{references}
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0a}Masking}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0b}Selective Extraction}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0c}Linear Encoding}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0d}Symbolic Rule Function}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0e}Linear Decoding}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0f}Normalization}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0g}Reintegration}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-B}0h}Final Output}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-C}}Summary Pipeline}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-D}}Training Details}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-E}}Figures and Tables}{4}{}\protected@file@percent }
\newlabel{FAT}{{\mbox {III-E}}{4}{}{subsection.3.5}{}}
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {III-E}0a}Positioning Figures and Tables}{4}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Table Type Styles}}{4}{}\protected@file@percent }
\newlabel{tab1}{{I}{4}{}{table.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Example of a figure caption.}}{4}{}\protected@file@percent }
\newlabel{fig}{{1}{4}{}{figure.1}{}}
\bibcite{hendrycks2021measuringmathematicalproblemsolving}{1}
\bibcite{ahn2024largelanguagemodelsmathematical}{2}
\bibcite{cobbe2021trainingverifierssolvemath}{3}
\bibcite{hoffmann2022trainingcomputeoptimallargelanguage}{4}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-C}}Summary Pipeline}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-D}}Training Details}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-E}}Figures and Tables}{3}{}\protected@file@percent }
\newlabel{FAT}{{\mbox {II-E}}{3}{}{subsection.2.5}{}}
\@writefile{toc}{\contentsline {paragraph}{\numberline {\mbox {II-E}0a}Positioning Figures and Tables}{3}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Table Type Styles}}{3}{}\protected@file@percent }
\newlabel{tab1}{{I}{3}{}{table.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Example of a figure caption.}}{3}{}\protected@file@percent }
\newlabel{fig}{{1}{3}{}{figure.1}{}}
\@writefile{toc}{\contentsline {section}{References}{3}{}\protected@file@percent }
\gdef \@abspage@last{3}
\bibcite{hendrycksmath2021}{5}
\bibcite{besiroglu2024chinchillascalingreplicationattempt}{6}
\bibcite{besold2017neuralsymboliclearningreasoningsurvey}{7}
\bibcite{gao2023palprogramaidedlanguagemodels}{8}
\bibcite{xu2024chatglmmathimprovingmathproblemsolving}{9}
\bibcite{petruzzellis2024assessingemergentsymbolicreasoning}{10}
\@writefile{toc}{\contentsline {section}{References}{5}{}\protected@file@percent }
\gdef \@abspage@last{5}

View File

@@ -1,5 +1,5 @@
% Generated by IEEEtran.bst, version: 1.12 (2007/01/11)
\begin{thebibliography}{1}
\begin{thebibliography}{10}
\providecommand{\url}[1]{#1}
\csname url@samestyle\endcsname
\providecommand{\newblock}{\relax}
@@ -7,8 +7,7 @@
\providecommand{\BIBentrySTDinterwordspacing}{\spaceskip=0pt\relax}
\providecommand{\BIBentryALTinterwordstretchfactor}{4}
\providecommand{\BIBentryALTinterwordspacing}{\spaceskip=\fontdimen2\font plus
\BIBentryALTinterwordstretchfactor\fontdimen3\font minus
\fontdimen4\font\relax}
\BIBentryALTinterwordstretchfactor\fontdimen3\font minus \fontdimen4\font\relax}
\providecommand{\BIBforeignlanguage}[2]{{%
\expandafter\ifx\csname l@#1\endcsname\relax
\typeout{** WARNING: IEEEtran.bst: No hyphenation pattern has been}%
@@ -22,35 +21,49 @@
\BIBdecl
\bibitem{hendrycks2021measuringmathematicalproblemsolving}
\BIBentryALTinterwordspacing
D.~Hendrycks, C.~Burns, S.~Kadavath, A.~Arora, S.~Basart, E.~Tang, D.~Song, and
J.~Steinhardt, ``Measuring mathematical problem solving with the math
dataset,'' 2021. [Online]. Available: \url{https://arxiv.org/abs/2103.03874}
\BIBentrySTDinterwordspacing
\bibitem{ahn2024largelanguagemodelsmathematical}
\BIBentryALTinterwordspacing
J.~Ahn, R.~Verma, R.~Lou, D.~Liu, R.~Zhang, and W.~Yin, ``Large language models
for mathematical reasoning: Progresses and challenges,'' 2024. [Online].
Available: \url{https://arxiv.org/abs/2402.00157}
J.~Ahn, R.~Verma, R.~Lou, D.~Liu, R.~Zhang, and W.~Yin, ``Large language models for mathematical reasoning: Progresses and challenges,'' 2024. [Online]. Available: \url{https://arxiv.org/abs/2402.00157}
\BIBentrySTDinterwordspacing
\bibitem{cobbe2021trainingverifierssolvemath}
\BIBentryALTinterwordspacing
K.~Cobbe, V.~Kosaraju, M.~Bavarian, M.~Chen, H.~Jun, L.~Kaiser, M.~Plappert,
J.~Tworek, J.~Hilton, R.~Nakano, C.~Hesse, and J.~Schulman, ``Training
verifiers to solve math word problems,'' 2021. [Online]. Available:
\url{https://arxiv.org/abs/2110.14168}
K.~Cobbe, V.~Kosaraju, M.~Bavarian, M.~Chen, H.~Jun, L.~Kaiser, M.~Plappert, J.~Tworek, J.~Hilton, R.~Nakano, C.~Hesse, and J.~Schulman, ``Training verifiers to solve math word problems,'' 2021. [Online]. Available: \url{https://arxiv.org/abs/2110.14168}
\BIBentrySTDinterwordspacing
\bibitem{hoffmann2022trainingcomputeoptimallargelanguage}
\BIBentryALTinterwordspacing
J.~Hoffmann, S.~Borgeaud, A.~Mensch, E.~Buchatskaya, T.~Cai, E.~Rutherford,
D.~de~Las~Casas, L.~A. Hendricks, J.~Welbl, A.~Clark, T.~Hennigan, E.~Noland,
K.~Millican, G.~van~den Driessche, B.~Damoc, A.~Guy, S.~Osindero,
K.~Simonyan, E.~Elsen, J.~W. Rae, O.~Vinyals, and L.~Sifre, ``Training
compute-optimal large language models,'' 2022. [Online]. Available:
\url{https://arxiv.org/abs/2203.15556}
J.~Hoffmann, S.~Borgeaud, A.~Mensch, E.~Buchatskaya, T.~Cai, E.~Rutherford, D.~de~Las~Casas, L.~A. Hendricks, J.~Welbl, A.~Clark, T.~Hennigan, E.~Noland, K.~Millican, G.~van~den Driessche, B.~Damoc, A.~Guy, S.~Osindero, K.~Simonyan, E.~Elsen, J.~W. Rae, O.~Vinyals, and L.~Sifre, ``Training compute-optimal large language models,'' 2022. [Online]. Available: \url{https://arxiv.org/abs/2203.15556}
\BIBentrySTDinterwordspacing
\bibitem{hendrycksmath2021}
D.~Hendrycks, C.~Burns, S.~Kadavath, A.~Arora, S.~Basart, E.~Tang, D.~Song, and J.~Steinhardt, ``Measuring mathematical problem solving with the math dataset,'' \emph{NeurIPS}, 2021.
\bibitem{besiroglu2024chinchillascalingreplicationattempt}
\BIBentryALTinterwordspacing
T.~Besiroglu, E.~Erdil, M.~Barnett, and J.~You, ``Chinchilla scaling: A replication attempt,'' 2024. [Online]. Available: \url{https://arxiv.org/abs/2404.10102}
\BIBentrySTDinterwordspacing
\bibitem{besold2017neuralsymboliclearningreasoningsurvey}
\BIBentryALTinterwordspacing
T.~R. Besold, A.~d'Avila Garcez, S.~Bader, H.~Bowman, P.~Domingos, P.~Hitzler, K.-U. Kuehnberger, L.~C. Lamb, D.~Lowd, P.~M.~V. Lima, L.~de~Penning, G.~Pinkas, H.~Poon, and G.~Zaverucha, ``Neural-symbolic learning and reasoning: A survey and interpretation,'' 2017. [Online]. Available: \url{https://arxiv.org/abs/1711.03902}
\BIBentrySTDinterwordspacing
\bibitem{gao2023palprogramaidedlanguagemodels}
\BIBentryALTinterwordspacing
L.~Gao, A.~Madaan, S.~Zhou, U.~Alon, P.~Liu, Y.~Yang, J.~Callan, and G.~Neubig, ``Pal: Program-aided language models,'' 2023. [Online]. Available: \url{https://arxiv.org/abs/2211.10435}
\BIBentrySTDinterwordspacing
\bibitem{xu2024chatglmmathimprovingmathproblemsolving}
\BIBentryALTinterwordspacing
Y.~Xu, X.~Liu, X.~Liu, Z.~Hou, Y.~Li, X.~Zhang, Z.~Wang, A.~Zeng, Z.~Du, W.~Zhao, J.~Tang, and Y.~Dong, ``Chatglm-math: Improving math problem-solving in large language models with a self-critique pipeline,'' 2024. [Online]. Available: \url{https://arxiv.org/abs/2404.02893}
\BIBentrySTDinterwordspacing
\bibitem{petruzzellis2024assessingemergentsymbolicreasoning}
\BIBentryALTinterwordspacing
F.~Petruzzellis, A.~Testolin, and A.~Sperduti, ``Assessing the emergent symbolic reasoning abilities of llama large language models,'' 2024. [Online]. Available: \url{https://arxiv.org/abs/2406.06588}
\BIBentrySTDinterwordspacing
\end{thebibliography}

View File

@@ -8,51 +8,60 @@ Reallocated singl_function (elt_size=8) to 100 items from 50.
Reallocated wiz_functions (elt_size=8) to 6000 items from 3000.
Reallocated singl_function (elt_size=8) to 100 items from 50.
Database file #1: references.bib
"{" immediately follows a field name---line 47 of file references.bib
: @article
: {hendrycksmath2021,
I'm skipping whatever remains of this entry
Repeated entry---line 123 of file references.bib
: @misc{ahn2024largelanguagemodelsmathematical
: ,
I'm skipping whatever remains of this entry
Warning--I didn't find a database entry for "wang2024neuralsymbolicoverview"
-- IEEEtran.bst version 1.12 (2007/01/11) by Michael Shell.
-- http://www.michaelshell.org/tex/ieeetran/bibtex/
-- See the "IEEEtran_bst_HOWTO.pdf" manual for usage information.
Warning--all relevant fields are empty in hendrycks2021measuringmathematicalproblemsolving
Done.
You've used 4 entries,
You've used 10 entries,
4024 wiz_defined-function locations,
828 strings with 8673 characters,
and the built_in function-call counts, 3095 in all, are:
= -- 167
> -- 196
851 strings with 10081 characters,
and the built_in function-call counts, 6363 in all, are:
= -- 361
> -- 366
< -- 0
+ -- 96
- -- 48
* -- 171
:= -- 440
add.period$ -- 8
call.type$ -- 4
change.case$ -- 4
+ -- 179
- -- 89
* -- 324
:= -- 918
add.period$ -- 18
call.type$ -- 10
change.case$ -- 9
chr.to.int$ -- 0
cite$ -- 4
duplicate$ -- 212
empty$ -- 233
format.name$ -- 52
if$ -- 670
cite$ -- 11
duplicate$ -- 466
empty$ -- 514
format.name$ -- 98
if$ -- 1393
int.to.chr$ -- 0
int.to.str$ -- 4
missing$ -- 72
newline$ -- 43
num.names$ -- 4
pop$ -- 224
int.to.str$ -- 10
missing$ -- 147
newline$ -- 69
num.names$ -- 9
pop$ -- 459
preamble$ -- 1
purify$ -- 0
quote$ -- 2
skip$ -- 200
skip$ -- 430
stack$ -- 0
substring$ -- 4
swap$ -- 156
substring$ -- 9
swap$ -- 320
text.length$ -- 0
text.prefix$ -- 0
top$ -- 5
type$ -- 4
warning$ -- 0
while$ -- 4
width$ -- 5
write$ -- 62
(There was 1 warning)
type$ -- 10
warning$ -- 1
while$ -- 9
width$ -- 12
write$ -- 114
(There were 2 error messages)

View File

@@ -1,13 +1,13 @@
# Fdb version 4
["bibtex IEEE-conference-template-062824"] 1751679066.05901 "IEEE-conference-template-062824.aux" "IEEE-conference-template-062824.bbl" "IEEE-conference-template-062824" 1751679076.61402 0
["bibtex IEEE-conference-template-062824"] 1751860353.39832 "IEEE-conference-template-062824.aux" "IEEE-conference-template-062824.bbl" "IEEE-conference-template-062824" 1751860353.44239 2
"./IEEEtran.bst" 1168508629 61632 1eded68932b68cdfaf04f54eba82634b ""
"./references.bib" 1745988553.90666 7963 b9939b564a2915e27f259dc4c0940896 ""
"IEEE-conference-template-062824.aux" 1751679076.51633 2925 9f83a4a6e6e02bd668234de14cc17af5 "pdflatex"
"./references.bib" 1751860070.59784 11162 0c4a878cd131480691d184fb1c510664 ""
"IEEE-conference-template-062824.aux" 1751860353.30991 3763 27fe0446fc41500b9d29182ae788a975 "pdflatex"
(generated)
"IEEE-conference-template-062824.bbl"
"IEEE-conference-template-062824.blg"
(rewritten before read)
["pdflatex"] 1751679076.34376 "IEEE-conference-template-062824.tex" "IEEE-conference-template-062824.pdf" "IEEE-conference-template-062824" 1751679076.61415 0
["pdflatex"] 1751860353.133 "IEEE-conference-template-062824.tex" "IEEE-conference-template-062824.pdf" "IEEE-conference-template-062824" 1751860353.44254 0
"/usr/share/texmf-dist/fonts/enc/dvips/base/8r.enc" 1742685315 4850 80dc9bab7f31fb78a000ccfed0e27cab ""
"/usr/share/texmf-dist/fonts/map/fontname/texfonts.map" 1742685315 3524 cb3e574dea2d1052e39280babc910dc8 ""
"/usr/share/texmf-dist/fonts/tfm/adobe/courier/pcrr7t.tfm" 1742685315 960 379cc0019370a9e0208a0a3f949f847a ""
@@ -89,9 +89,9 @@
"/usr/share/texmf-dist/web2c/texmf.cnf" 1742685315 42087 b0f9697c952c8f5a8eede4134282cb0d ""
"/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1745983722 5311557 9da84c45f5b388655502706482323571 ""
"/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1745983710 2008239 6f62dc7546b9b923a0fd3373b595cfc7 ""
"IEEE-conference-template-062824.aux" 1751679076.51633 2925 9f83a4a6e6e02bd668234de14cc17af5 "pdflatex"
"IEEE-conference-template-062824.bbl" 1751679075.86493 2433 e152102629560a8f4733c4ba49431cf8 "bibtex IEEE-conference-template-062824"
"IEEE-conference-template-062824.tex" 1751679064.97887 13983 74ed50a8bf3cab46915f53fa00f71ab8 ""
"IEEE-conference-template-062824.aux" 1751860353.30991 3763 27fe0446fc41500b9d29182ae788a975 "pdflatex"
"IEEE-conference-template-062824.bbl" 1751860353.44091 4060 b186088d662eca23c8239cd0442e2d5d "bibtex IEEE-conference-template-062824"
"IEEE-conference-template-062824.tex" 1751860352.34514 24155 2d62bc543ce815729d0be3b6a650e1bf ""
"IEEEtran.cls" 1440654524 288304 b67b6fc6c2abb39f9b461923f5199343 ""
"fig1.png" 1719239881 13815 9af23e798dccf51edbe98938c641893e ""
(generated)

View File

@@ -182,6 +182,8 @@ INPUT /usr/share/texmf-dist/fonts/vf/adobe/times/ptmb7t.vf
INPUT /usr/share/texmf-dist/fonts/tfm/adobe/times/ptmb8r.tfm
INPUT /usr/share/texmf-dist/fonts/vf/adobe/times/ptmbi7t.vf
INPUT /usr/share/texmf-dist/fonts/tfm/adobe/times/ptmbi8r.tfm
INPUT /usr/share/texmf-dist/fonts/vf/adobe/times/ptmri7t.vf
INPUT /usr/share/texmf-dist/fonts/tfm/adobe/times/ptmri8r.tfm
INPUT IEEE-conference-template-062824.aux
INPUT /usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb
INPUT /usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb

View File

@@ -1,6 +1,7 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 4 JUL 2025 20:31
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 6 JUL 2025 22:52
entering extended mode
restricted \write18 enabled.
file:line:error style messages enabled.
%&-line parsing enabled.
**IEEE-conference-template-062824.tex
(./IEEE-conference-template-062824.tex
@@ -17,8 +18,7 @@ Document Class: IEEEtran 2015/08/26 V1.8b by Michael Shell
\@IEEEtrantmpcountB=\count197
\@IEEEtrantmpcountC=\count198
\@IEEEtrantmptoksA=\toks17
LaTeX Font Info: Trying to load font information for OT1+ptm on input line 5
03.
LaTeX Font Info: Trying to load font information for OT1+ptm on input line 503.
(/usr/share/texmf-dist/tex/latex/psnfss/ot1ptm.fd
File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
)
@@ -33,52 +33,42 @@ LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <5> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <5> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <7> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <7> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <8> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <8> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <9> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <9> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <11> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <11> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <12> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <12> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <17> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <17> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <20> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <20> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <24> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <24> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
\IEEEquantizedlength=\dimen148
\IEEEquantizedlengthdiff=\dimen149
\IEEEquantizedtextheightdiff=\dimen150
@@ -119,25 +109,21 @@ LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <24> not available
LaTeX Info: Redefining \cite on input line 302.
LaTeX Info: Redefining \nocite on input line 332.
Package: cite 2015/02/27 v 5.5
)
(/usr/share/texmf-dist/tex/latex/amsmath/amsmath.sty
) (/usr/share/texmf-dist/tex/latex/amsmath/amsmath.sty
Package: amsmath 2024/11/05 v2.17t AMS math features
\@mathmargin=\skip52
For additional information on amsmath, use the `?' option.
(/usr/share/texmf-dist/tex/latex/amsmath/amstext.sty
Package: amstext 2021/08/26 v2.01 AMS text
(/usr/share/texmf-dist/tex/latex/amsmath/amsgen.sty
(/usr/share/texmf-dist/tex/latex/amsmath/amsgen.sty
File: amsgen.sty 1999/11/30 v2.0 generic functions
\@emptytoks=\toks18
\ex@=\dimen165
))
(/usr/share/texmf-dist/tex/latex/amsmath/amsbsy.sty
)) (/usr/share/texmf-dist/tex/latex/amsmath/amsbsy.sty
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
\pmbraise@=\dimen166
)
(/usr/share/texmf-dist/tex/latex/amsmath/amsopn.sty
) (/usr/share/texmf-dist/tex/latex/amsmath/amsopn.sty
Package: amsopn 2022/04/08 v2.04 operator names
)
\inf@bad=\count277
@@ -187,25 +173,20 @@ LaTeX Info: Redefining \Relbar on input line 970.
\mathdisplay@stack=\toks22
LaTeX Info: Redefining \[ on input line 2953.
LaTeX Info: Redefining \] on input line 2954.
)
(/usr/share/texmf-dist/tex/latex/amsfonts/amssymb.sty
) (/usr/share/texmf-dist/tex/latex/amsfonts/amssymb.sty
Package: amssymb 2013/01/14 v3.01 AMS font symbols
(/usr/share/texmf-dist/tex/latex/amsfonts/amsfonts.sty
(/usr/share/texmf-dist/tex/latex/amsfonts/amsfonts.sty
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
\symAMSa=\mathgroup4
\symAMSb=\mathgroup5
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
))
(/usr/share/texmf-dist/tex/latex/algorithms/algorithmic.sty
)) (/usr/share/texmf-dist/tex/latex/algorithms/algorithmic.sty
Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic'
(/usr/share/texmf-dist/tex/latex/base/ifthen.sty
(/usr/share/texmf-dist/tex/latex/base/ifthen.sty
Package: ifthen 2024/03/16 v1.1e Standard LaTeX ifthen package (DPC)
)
(/usr/share/texmf-dist/tex/latex/graphics/keyval.sty
) (/usr/share/texmf-dist/tex/latex/graphics/keyval.sty
Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
\KV@toks@=\toks23
)
@@ -215,39 +196,30 @@ Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
\c@ALC@depth=\count292
\ALC@tlm=\skip55
\algorithmicindent=\skip56
)
(/usr/share/texmf-dist/tex/latex/graphics/graphicx.sty
) (/usr/share/texmf-dist/tex/latex/graphics/graphicx.sty
Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
(/usr/share/texmf-dist/tex/latex/graphics/graphics.sty
(/usr/share/texmf-dist/tex/latex/graphics/graphics.sty
Package: graphics 2024/08/06 v1.4g Standard LaTeX Graphics (DPC,SPQR)
(/usr/share/texmf-dist/tex/latex/graphics/trig.sty
(/usr/share/texmf-dist/tex/latex/graphics/trig.sty
Package: trig 2023/12/02 v1.11 sin cos tan (DPC)
)
(/usr/share/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
) (/usr/share/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
)
Package graphics Info: Driver file: pdftex.def on input line 106.
(/usr/share/texmf-dist/tex/latex/graphics-def/pdftex.def
(/usr/share/texmf-dist/tex/latex/graphics-def/pdftex.def
File: pdftex.def 2024/04/13 v1.2c Graphics/color driver for pdftex
))
\Gin@req@height=\dimen174
\Gin@req@width=\dimen175
)
(/usr/share/texmf-dist/tex/latex/base/textcomp.sty
) (/usr/share/texmf-dist/tex/latex/base/textcomp.sty
Package: textcomp 2024/04/24 v2.1b Standard LaTeX package
)
(/usr/share/texmf-dist/tex/latex/xcolor/xcolor.sty
) (/usr/share/texmf-dist/tex/latex/xcolor/xcolor.sty
Package: xcolor 2024/09/29 v3.02 LaTeX color extensions (UK)
(/usr/share/texmf-dist/tex/latex/graphics-cfg/color.cfg
(/usr/share/texmf-dist/tex/latex/graphics-cfg/color.cfg
File: color.cfg 2016/01/02 v1.6 sample color configuration
)
Package xcolor Info: Driver file: pdftex.def on input line 274.
(/usr/share/texmf-dist/tex/latex/graphics/mathcolor.ltx)
(/usr/share/texmf-dist/tex/latex/graphics/mathcolor.ltx)
Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1349.
Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1353.
Package xcolor Info: Model `RGB' extended on input line 1365.
@@ -257,13 +229,11 @@ Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1369.
Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1370.
Package xcolor Info: Model `Gray' substituted by `gray' on input line 1371.
Package xcolor Info: Model `wave' substituted by `hsb' on input line 1372.
)
(/usr/share/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
) (/usr/share/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
File: l3backend-pdftex.def 2024-05-08 L3 backend support: PDF output (pdfTeX)
\l__color_backend_stack_int=\count293
\l__pdf_internal_box=\box56
)
(./IEEE-conference-template-062824.aux)
) (./IEEE-conference-template-062824.aux)
\openout1 = `IEEE-conference-template-062824.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 16.
@@ -297,66 +267,57 @@ LaTeX Font Info: ... okay on input line 16.
\everyMPtoPDFconversion=\toks25
) (/usr/share/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
85.
(/usr/share/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
e
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485.
(/usr/share/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live
))
LaTeX Warning: Reference `tab:model-sizes' on page 1 undefined on input line 43
.
LaTeX Warning: Reference `tab:model-sizes' on page 1 undefined on input line 43.
LaTeX Warning: Citation `wang2024neuralsymbolicoverview' on page 1 undefined on
input line 49.
LaTeX Warning: Citation `wang2024neuralsymbolicoverview' on page 1 undefined on input line 49.
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}{/usr/share/texmf-dist/fon
ts/enc/dvips/base/8r.enc}
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}{/usr/share/texmf-dist/fonts/enc/dvips/base/8r.enc}
]
LaTeX Font Info: Trying to load font information for U+msa on input line 57.
(/usr/share/texmf-dist/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
)
LaTeX Font Info: Trying to load font information for U+msb on input line 57.
(/usr/share/texmf-dist/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
)
LaTeX Font Info: Trying to load font information for OT1+pcr on input line 8
5.
(/usr/share/texmf-dist/tex/latex/psnfss/ot1pcr.fd
File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
)
[2]
LaTeX Font Info: Trying to load font information for U+msa on input line 89.
(/usr/share/texmf-dist/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
)
LaTeX Font Info: Trying to load font information for U+msb on input line 89.
(/usr/share/texmf-dist/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
)
LaTeX Font Info: Trying to load font information for OT1+pcr on input line 117.
(/usr/share/texmf-dist/tex/latex/psnfss/ot1pcr.fd
File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
)
[3]
<fig1.png, id=24, 249.3315pt x 189.22694pt>
File: fig1.png Graphic file (type png)
<use fig1.png>
Package pdftex.def Info: fig1.png used on input line 246.
Package pdftex.def Info: fig1.png used on input line 278.
(pdftex.def) Requested size: 249.33087pt x 189.22647pt.
LaTeX Font Info: Trying to load font information for OMS+ptm on input line 2
57.
(/usr/share/texmf-dist/tex/latex/psnfss/omsptm.fd
LaTeX Font Info: Trying to load font information for OMS+ptm on input line 289.
(/usr/share/texmf-dist/tex/latex/psnfss/omsptm.fd
File: omsptm.fd
)
LaTeX Font Info: Font shape `OMS/ptm/m/n' in size <10> not available
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 257.
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 289.
(./IEEE-conference-template-062824.bbl
(./IEEE-conference-template-062824.bbl
)
[4 <./fig1.png (PNG copy)>])
** Conference Paper **
Before submitting the final camera ready copy, remember to:
@@ -370,7 +331,11 @@ Before submitting the final camera ready copy, remember to:
[3 <./fig1.png (PNG copy)>] (./IEEE-conference-template-062824.aux)
[5
] (./IEEE-conference-template-062824.aux)
***********
LaTeX2e <2024-11-01> patch level 2
L3 programming layer <2025-01-18>
@@ -381,30 +346,18 @@ LaTeX Warning: There were undefined references.
)
Here is how much of TeX's memory you used:
4275 strings out of 475171
66464 string characters out of 5767095
473773 words of memory out of 5000000
27224 multiletter control sequences out of 15000+600000
600708 words of font info for 113 fonts, out of 8000000 for 9000
4282 strings out of 475171
66725 string characters out of 5767095
473793 words of memory out of 5000000
27230 multiletter control sequences out of 15000+600000
601844 words of font info for 114 fonts, out of 8000000 for 9000
14 hyphenation exceptions out of 8191
57i,11n,65p,1559b,304s stack positions out of 10000i,1000n,20000p,200000b,200000s
</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/
texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/texmf-dist/fon
ts/type1/public/amsfonts/cm/cmmi5.pfb></usr/share/texmf-dist/fonts/type1/public
/amsfonts/cm/cmmi7.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cm
r10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb></usr/sh
are/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb></usr/share/texmf-dist/f
onts/type1/public/amsfonts/cm/cmr7.pfb></usr/share/texmf-dist/fonts/type1/publi
c/amsfonts/cm/cmsy10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/
cmsy7.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb
></usr/share/texmf-dist/fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texmf-di
st/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/time
s/utmbi8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmr8a.pfb></usr/sha
re/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
Output written on IEEE-conference-template-062824.pdf (3 pages, 189649 bytes).
</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb></usr/share/texmf-dist/fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmbi8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmr8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
Output written on IEEE-conference-template-062824.pdf (5 pages, 199542 bytes).
PDF statistics:
96 PDF objects out of 1000 (max. 8388607)
57 compressed objects within 1 object stream
102 PDF objects out of 1000 (max. 8388607)
61 compressed objects within 1 object stream
0 named destinations out of 1000 (max. 500000)
6 words of extra memory for PDF output out of 10000 (max. 10000000)

Binary file not shown.

Binary file not shown.

View File

@@ -21,7 +21,7 @@
\IEEEauthorblockA{\textit{dept. name of organization (of Aff.)} \\
\textit{name of organization (of Aff.)}\\
City, Country \\
email address or ORCID}
krishna@ayyalasomayajula.net}
}
\maketitle
@@ -48,6 +48,38 @@ The proposed system modifies the Llama 3 3B model, an open-weight transformer, t
This work contributes to the broader discourse on integrating symbolic computation into neural architectures. Prior efforts in neural-symbolic computing have explored symbolic regression, logic programming over neural graphs, and reinforcement learning for tool use \cite{wang2024neuralsymbolicoverview}. Unlike these approaches, our method does not require training the model to learn mathematical operations; instead, it injects these operations at runtime within the forward pass of inference. This design minimizes the computational overhead associated with training while maximizing inference-time efficiency.
\section{Related Works}
Mathematical reasoning in artificial intelligence is broadly categorized into two complementary paradigms: \textit{symbolic computation} and \textit{statistical pattern learning}. Symbolic computation refers to the manipulation of mathematical objects using discrete logic, such as arithmetic operations, algebraic simplifications, or equation solving. These processes are deterministic, meaning that given the same inputs, they yield the same outputs independent of statistical variation. In contrast, statistical pattern learning, as embodied by neural networks, involves learning probabilistic relationships between tokens or symbols through exposure to large datasets. While statistical learning captures distributional patterns across language, it does not inherently encode the rules of mathematics that govern the manipulation of numbers and expressions.
Historically, symbolic artificial intelligence systems such as theorem provers, expert systems, and computer algebra systems (e.g., Mathematica, SymPy) have excelled at mathematical reasoning due to their reliance on explicit rule sets and logic engines. These systems require handcrafted rules but offer precise, explainable solutions. Neural networks, including modern large language models, learn representations of symbols as continuous vectors in high-dimensional spaces, enabling them to generate coherent text and recognize syntactic patterns. However, without explicit rules or external reasoning engines, their mathematical capabilities remain fragile and reliant on memorized patterns rather than systematic reasoning. Bridging the gap between these paradigms has become a critical area of research in neural-symbolic computing.
Efforts to improve mathematical competence in language models generally fall into three categories. The first is \textit{data-centric approaches}, where models are fine-tuned on curated datasets containing mathematical problems, equation patterns, and arithmetic exercises. While this improves recall of memorized problem structures, it does not enable novel symbolic manipulation. The second is \textit{tool-augmented inference}, where models are coupled with external symbolic engines like Wolfram Alpha or SymPy at runtime. These tools enable accurate computation but introduce latency, architectural complexity, and reliance on external dependencies. The third is \textit{architectural modification}, where symbolic components are embedded directly into the models computational graph. This approach aims to enable the model to compute symbolically during inference, preserving end-to-end differentiability and eliminating external dependencies.
Several conventions have emerged in the study of neural mathematical reasoning. Researchers distinguish between \textit{in-context learning} of symbolic patterns (where a model memorizes examples during pretraining), \textit{emergent reasoning} (where generalization arises without explicit training on mathematical tasks), and \textit{symbolic execution}, where operations follow deterministic pathways independent of model weights. Additionally, evaluations often distinguish between \textit{single-step} arithmetic, such as evaluating ``3 + 5,'' and \textit{multi-step} problems, such as solving algebraic expressions or nested equations. Performance on benchmarks like MATH~\cite{hendrycksmath2021} and GSM8K has revealed that while LLMs handle natural language problem descriptions well, they frequently err in the computation stage, demonstrating their probabilistic nature.
Thus, the challenge is not simply a matter of increasing dataset size or model parameters but rethinking how computation is performed within neural networks. Approaches like program synthesis, intermediate variable reasoning, and explicit mathematical instruction tuning have made progress but remain constrained by the probabilistic nature of neural inference. Embedding deterministic operations directly into the models inference pathways represents a fundamentally different approach. Instead of predicting the answer token by token, the model can deterministically compute intermediate results within its tensor operations. This paper contributes to this emerging direction by proposing a mechanism for rule-based tensor mutations applied at specific locations within a transformers multi-layer perceptron (MLP) sub-blocks, enabling precise symbolic computation without external tools or fine-tuning.
The gap between probabilistic language modeling and deterministic symbolic reasoning has been a persistent challenge in the development of large language models (LLMs). Hendrycks et al.~\cite{hendrycksmath2021} introduced the MATH dataset, a large-scale benchmark designed to assess symbolic problem-solving abilities in neural networks. Their results indicated that pretrained LLMs—even those fine-tuned on mathematical content—frequently fail to correctly solve algebraic expressions, arithmetic chains, and multi-step symbolic equations. These failures highlight that while LLMs excel at reproducing syntactic patterns observed during training, they do not inherently perform symbolic manipulation, instead relying on probabilistic co-occurrence statistics.
Ahn et al.~\cite{ahn2024largelanguagemodelsmathematical} further explored this discrepancy, identifying key bottlenecks in the way LLMs generalize mathematical concepts. Their survey outlines how token-level models struggle with operator precedence, recursive computations, and intermediate variable handling. They observe that, unlike humans who approach mathematics through compositional reasoning and intermediate abstractions, LLMs tend to memorize shallow patterns from training data. The authors emphasize the need for architectural interventions that can separate symbolic execution from probabilistic context modeling—a gap that this paper's rule-based mutation pathways directly address.
While one tempting solution is to scale models larger, Besiroglu et al.~\cite{besiroglu2024chinchillascalingreplicationattempt} provide evidence that such scaling has diminishing returns. Their attempt to replicate the Chinchilla scaling laws confirms that increases in model size and training data improve overall perplexity but fail to proportionally improve performance on arithmetic tasks. This suggests that arithmetic reasoning is not merely a data-scaling problem but a fundamental architectural shortcoming. Their work motivates alternative solutions beyond brute-force parameter expansion, such as modifying the internal computation pathways of transformer blocks.
The broader neural-symbolic learning community has investigated ways to integrate explicit symbolic reasoning into neural networks. Besold et al.~\cite{besold2017neuralsymboliclearningreasoningsurvey} categorize these approaches into external symbolic reasoning engines and embedded symbolic layers. External engines, such as Prolog interpreters or SMT solvers, provide high reasoning accuracy but introduce significant inference-time latency and disrupt the end-to-end differentiable flow. Embedded symbolic modules attempt to perform symbolic operations within the neural model itself but face challenges aligning symbolic operations with gradient-based optimization. This paper follows the embedded approach, but bypasses gradient concerns by employing fixed rule-based operations during the forward pass, allowing symbolic computation to coexist with trainable layers.
Program-aided models offer another perspective. Gao et al.~\cite{gao2023palprogramaidedlanguagemodels} proposed PAL, where language models generate executable Python code to solve mathematical problems. By offloading arithmetic and logical tasks to external interpreters, PAL improves accuracy on formal reasoning benchmarks. However, this introduces runtime inefficiencies and dependency on non-neural components. Unlike PAL, our work proposes symbolic operations that are computed directly on GPU tensor cores as part of the LLM's forward pass, avoiding context switches and preserving inference latency.
Fine-tuning techniques remain a popular method for improving mathematical accuracy. Xu et al.~\cite{xu2024chatglmmathimprovingmathproblemsolving} introduced ChatGLM-Math, a pipeline where the model critiques its own mathematical outputs and refines them iteratively. While effective, this process requires task-specific fine-tuning, increasing both training and inference costs. Moreover, Petruzzellis et al.~\cite{petruzzellis2024assessingemergentsymbolicreasoning} showed that even when fine-tuned, LLaMA models exhibit inconsistent symbolic reasoning abilities, with success rates highly dependent on input complexity and dataset familiarity. This inconsistency suggests that fine-tuning alone cannot fully bridge the symbolic reasoning gap.
These works converge on a common insight: language models can pattern-match symbolic expressions but lack internal mechanisms for performing symbolic operations themselves. Existing solutions either rely on fine-tuning to statistically approximate symbolic outcomes or delegate computation to external engines. In contrast, this paper proposes embedding deterministic, rule-based tensor mutations directly into the models internal linear layers. By masking specific tensor regions, applying deterministic arithmetic functions—such as addition, subtraction, multiplication, division, exponentiation, bitwise logic, and shifts—and reintegrating the results within the inference pass, the model gains native support for symbolic computation.
Critically, this approach does not replace the probabilistic language modeling capabilities of the transformer but augments them with deterministic pathways optimized for mathematical reasoning. Symbolic operations are performed without gradient flow, ensuring that the core model remains a probabilistic language generator while gaining deterministic subroutines where needed. This architecture represents a middle ground between pure neural-symbolic systems and hybrid models with external engines, achieving both architectural elegance and computational efficiency.
\section{Methods}
\subsection{Baseline MLP Feed-Forward Block}
@@ -91,7 +123,7 @@ where \(w1\) and \(w2\) are linear layers and \texttt{relu} is the chosen activa
Graphically, the data flow is:
\[
x \rightarrow \text{Linear}(W_1) \rightarrow f(\cdot) \rightarrow \text{Linear}(W_2) \rightarrow \text{Output}.
x \rightarrow \text{Linear}(W_1) \rightarrow f(\circ) \rightarrow \text{Linear}(W_2) \rightarrow \text{Output}.
\]
This architecture applies sequential transformations, where each layer processes the output of the previous layer.

View File

@@ -42,7 +42,15 @@ DOI = {10.3390/app14020744}
url={https://arxiv.org/abs/2203.15556},
}
@misc{hendrycks2021measuringmathematicalproblemsolving,
title={Measuring Mathematical Problem Solving With the MATH Dataset},
@article{hendrycksmath2021,
title={Measuring Mathematical Problem Solving With the MATH Dataset},
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
journal={NeurIPS},
year={2021}
}
title={Measuring Mathematical Problem Solving With the MATH Dataset},
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
year={2021},
eprint={2103.03874},
@@ -65,7 +73,7 @@ DOI = {10.3390/app14020744}
year={2021},
eprint={2110.14168},
archivePrefix={arXiv},
primaryClass={cs.LG},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2110.14168},
}
@misc{cuda_programming_guide_2025,
@@ -85,3 +93,76 @@ DOI = {10.3390/app14020744}
primaryClass={cs.CL},
url={https://arxiv.org/abs/2412.19437},
}
@article{hendrycksmath2021,
title={Measuring Mathematical Problem Solving With the MATH Dataset},
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
journal={NeurIPS},
year={2021}
}
@misc{besiroglu2024chinchillascalingreplicationattempt,
title={Chinchilla Scaling: A replication attempt},
author={Tamay Besiroglu and Ege Erdil and Matthew Barnett and Josh You},
year={2024},
eprint={2404.10102},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2404.10102},
}
@misc{ahn2024largelanguagemodelsmathematical,
title={Large Language Models for Mathematical Reasoning: Progresses and Challenges},
author={Janice Ahn and Rishu Verma and Renze Lou and Di Liu and Rui Zhang and Wenpeng Yin},
year={2024},
eprint={2402.00157},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2402.00157},
}
@misc{besold2017neuralsymboliclearningreasoningsurvey,
title={Neural-Symbolic Learning and Reasoning: A Survey and Interpretation},
author={Tarek R. Besold and Artur d'Avila Garcez and Sebastian Bader and Howard Bowman and Pedro Domingos and Pascal Hitzler and Kai-Uwe Kuehnberger and Luis C. Lamb and Daniel Lowd and Priscila Machado Vieira Lima and Leo de Penning and Gadi Pinkas and Hoifung Poon and Gerson Zaverucha},
year={2017},
eprint={1711.03902},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/1711.03902},
}
@misc{petruzzellis2024assessingemergentsymbolicreasoning,
title={Assessing the Emergent Symbolic Reasoning Abilities of Llama Large Language Models},
author={Flavio Petruzzellis and Alberto Testolin and Alessandro Sperduti},
year={2024},
eprint={2406.06588},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.06588},
}
@misc{xu2024chatglmmathimprovingmathproblemsolving,
title={ChatGLM-Math: Improving Math Problem-Solving in Large Language Models with a Self-Critique Pipeline},
author={Yifan Xu and Xiao Liu and Xinghan Liu and Zhenyu Hou and Yueyan Li and Xiaohan Zhang and Zihan Wang and Aohan Zeng and Zhengxiao Du and Wenyi Zhao and Jie Tang and Yuxiao Dong},
year={2024},
eprint={2404.02893},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2404.02893},
}
@misc{gao2023palprogramaidedlanguagemodels,
title={PAL: Program-aided Language Models},
author={Luyu Gao and Aman Madaan and Shuyan Zhou and Uri Alon and Pengfei Liu and Yiming Yang and Jamie Callan and Graham Neubig},
year={2023},
eprint={2211.10435},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2211.10435},
}