almost done
This commit is contained in:
@@ -30,6 +30,7 @@
|
||||
\abx@aux@segm{0}{0}{deepseekai2025deepseekv3technicalreport}
|
||||
\abx@aux@cite{0}{cuda_programming_guide_2025}
|
||||
\abx@aux@segm{0}{0}{cuda_programming_guide_2025}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}Implementation in Rust Using Burn}{10}{}\protected@file@percent }
|
||||
\abx@aux@read@bbl@mdfivesum{A4BF38394F3D8B19FB0185ACC0064B31}
|
||||
\abx@aux@defaultrefcontext{0}{ahn2024largelanguagemodelsmathematical}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{cobbe2021trainingverifierssolvemath}{nty/global//global/global/global}
|
||||
@@ -41,5 +42,5 @@
|
||||
\abx@aux@defaultrefcontext{0}{app14020744}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{vaswani2023attentionneed}{nty/global//global/global/global}
|
||||
\abx@aux@defaultrefcontext{0}{Wang2024}{nty/global//global/global/global}
|
||||
\xdef \mintedoldcachechecksum{\detokenize{27DB95EA8F56C67A42C668E77E57246D:2}}
|
||||
\gdef \@abspage@last{10}
|
||||
\xdef \mintedoldcachechecksum{\detokenize{F97D47BB20F0BD69D0F92961482F0074:10}}
|
||||
\gdef \@abspage@last{11}
|
||||
|
||||
30
EEMLA.blg
30
EEMLA.blg
@@ -1,18 +1,18 @@
|
||||
[0] Config.pm:308> INFO - This is Biber 2.20
|
||||
[0] Config.pm:311> INFO - Logfile is 'EEMLA.blg'
|
||||
[39] biber:340> INFO - === Tue Apr 29, 2025, 23:58:53
|
||||
[39] biber:340> INFO - === Wed Apr 30, 2025, 01:03:12
|
||||
[47] Biber.pm:420> INFO - Reading 'EEMLA.bcf'
|
||||
[81] Biber.pm:994> INFO - Found 10 citekeys in bib section 0
|
||||
[89] Biber.pm:4463> INFO - Processing section 0
|
||||
[94] Biber.pm:4654> INFO - Looking for bibtex file 'references.bib' for section 0
|
||||
[94] bibtex.pm:1713> INFO - LaTeX decoding ...
|
||||
[98] bibtex.pm:1519> INFO - Found BibTeX data source 'references.bib'
|
||||
[321] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[321] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[322] Biber.pm:4283> INFO - Sorting list 'nty/global//global/global/global' of type 'entry' with template 'nty' and locale 'en-US'
|
||||
[322] Biber.pm:4289> INFO - No sort tailoring available for locale 'en-US'
|
||||
[333] bbl.pm:676> INFO - Writing 'EEMLA.bbl' with encoding 'UTF-8'
|
||||
[372] bbl.pm:779> INFO - Output to EEMLA.bbl
|
||||
[372] Biber.pm:131> WARN - legacy month field 'Oct' in entry 'Wang2024' is not an integer - this will probably not sort properly.
|
||||
[373] Biber.pm:131> WARN - legacy month field 'Feb' in entry 'cuda_programming_guide_2025' is not an integer - this will probably not sort properly.
|
||||
[373] Biber.pm:133> INFO - WARNINGS: 2
|
||||
[83] Biber.pm:994> INFO - Found 10 citekeys in bib section 0
|
||||
[91] Biber.pm:4463> INFO - Processing section 0
|
||||
[95] Biber.pm:4654> INFO - Looking for bibtex file 'references.bib' for section 0
|
||||
[96] bibtex.pm:1713> INFO - LaTeX decoding ...
|
||||
[100] bibtex.pm:1519> INFO - Found BibTeX data source 'references.bib'
|
||||
[312] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[313] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[313] Biber.pm:4283> INFO - Sorting list 'nty/global//global/global/global' of type 'entry' with template 'nty' and locale 'en-US'
|
||||
[313] Biber.pm:4289> INFO - No sort tailoring available for locale 'en-US'
|
||||
[323] bbl.pm:676> INFO - Writing 'EEMLA.bbl' with encoding 'UTF-8'
|
||||
[359] bbl.pm:779> INFO - Output to EEMLA.bbl
|
||||
[359] Biber.pm:131> WARN - legacy month field 'Oct' in entry 'Wang2024' is not an integer - this will probably not sort properly.
|
||||
[359] Biber.pm:131> WARN - legacy month field 'Feb' in entry 'cuda_programming_guide_2025' is not an integer - this will probably not sort properly.
|
||||
[359] Biber.pm:133> INFO - WARNINGS: 2
|
||||
|
||||
92
EEMLA.log
92
EEMLA.log
@@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 29 APR 2025 23:58
|
||||
This is pdfTeX, Version 3.141592653-2.6-1.40.27 (TeX Live 2026/dev/Arch Linux) (preloaded format=pdflatex 2025.4.29) 30 APR 2025 01:03
|
||||
entering extended mode
|
||||
\write18 enabled.
|
||||
%&-line parsing enabled.
|
||||
@@ -803,11 +803,11 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
\pydata@fh@_EEE4DC88718EF2B9D35BC605B86C9E60.data.minted=\write6
|
||||
\openout6 = `_EEE4DC88718EF2B9D35BC605B86C9E60.data.minted'.
|
||||
|
||||
runsystem(latexminted config --timestamp 20250429235855 EEE4DC88718EF2B9D35BC6
|
||||
runsystem(latexminted config --timestamp 20250430010315 EEE4DC88718EF2B9D35BC6
|
||||
05B86C9E60)...executed.
|
||||
|
||||
(./_EEE4DC88718EF2B9D35BC605B86C9E60.config.minted)
|
||||
runsystem(latexminted cleanconfig --timestamp 20250429235855 EEE4DC88718EF2B9D
|
||||
runsystem(latexminted cleanconfig --timestamp 20250430010315 EEE4DC88718EF2B9D
|
||||
35BC605B86C9E60)...executed.
|
||||
|
||||
|
||||
@@ -820,11 +820,6 @@ LaTeX Font Info: Trying to load font information for OT1+pcr on input line 3
|
||||
File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
|
||||
))
|
||||
|
||||
LaTeX Warning: Reference `tb:model-sizes' on page 8 undefined on input line 191
|
||||
.
|
||||
|
||||
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
@@ -842,6 +837,32 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[9]
|
||||
! Misplaced alignment tab character &.
|
||||
<argument> Implementation Hardware &
|
||||
Software
|
||||
l.196 ...extbf{Implementation Hardware & Software}
|
||||
}
|
||||
?
|
||||
(./_minted/F9EE4BCA082EFD60976D581C36A6E9EA.highlight.minted)
|
||||
(./_minted/B0420CD8DD504542F352049F6401D587.highlight.minted)
|
||||
(./_minted/E1F5AF9FA573AC6BDDC923342CB6518C.highlight.minted)
|
||||
(./_minted/DF772CCE72A4F78D4332765469A7A0E9.highlight.minted)
|
||||
(./_minted/B0420CD8DD504542F352049F6401D587.highlight.minted)
|
||||
! FancyVerb Error:
|
||||
Extraneous input ` use burn::module::Module; use burn::tensor::backend::WgpuB
|
||||
ackend; use log::{info, warn};' between \begin{minted}[<key=value>] and line en
|
||||
d
|
||||
.
|
||||
\FV@Error ... {FancyVerb Error:
|
||||
\space \space #1
|
||||
}
|
||||
|
||||
l.211 ...kend::WgpuBackend; use log::{info, warn};
|
||||
|
||||
?
|
||||
(./_minted/B189EEB4D1BC1BF5E5BC7A5A7CCC382C.highlight.minted)
|
||||
(./_minted/9C4740B2321818118B3501A042F1B7E5.highlight.minted)
|
||||
(./_minted/2B22D1BB0060FE72ED7A036EB8243444.highlight.minted)
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
@@ -850,41 +871,48 @@ Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) You might also make \topmargin smaller:
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[10] (./EEMLA.aux)
|
||||
[10]
|
||||
|
||||
|
||||
Package fancyhdr Warning: \headheight is too small (12.0pt):
|
||||
(fancyhdr) Make it at least 14.49998pt, for example:
|
||||
(fancyhdr) \setlength{\headheight}{14.49998pt}.
|
||||
(fancyhdr) You might also make \topmargin smaller:
|
||||
(fancyhdr) \addtolength{\topmargin}{-2.49998pt}.
|
||||
|
||||
[11] (./EEMLA.aux)
|
||||
***********
|
||||
LaTeX2e <2024-11-01> patch level 2
|
||||
L3 programming layer <2025-01-18>
|
||||
***********
|
||||
|
||||
|
||||
LaTeX Warning: There were undefined references.
|
||||
|
||||
Package logreq Info: Writing requests to 'EEMLA.run.xml'.
|
||||
\openout1 = `EEMLA.run.xml'.
|
||||
|
||||
)
|
||||
)
|
||||
(\end occurred when \ifx on line 211 was incomplete)
|
||||
Here is how much of TeX's memory you used:
|
||||
17009 strings out of 475171
|
||||
359050 string characters out of 5767095
|
||||
1349252 words of memory out of 5000000
|
||||
39792 multiletter control sequences out of 15000+600000
|
||||
572034 words of font info for 66 fonts, out of 8000000 for 9000
|
||||
17079 strings out of 475171
|
||||
362138 string characters out of 5767095
|
||||
1349253 words of memory out of 5000000
|
||||
39836 multiletter control sequences out of 15000+600000
|
||||
574682 words of font info for 71 fonts, out of 8000000 for 9000
|
||||
14 hyphenation exceptions out of 8191
|
||||
74i,18n,81p,1547b,2647s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||
</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/share/
|
||||
texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texmf-dist/fon
|
||||
ts/type1/public/amsfonts/cm/cmmi12.pfb></usr/share/texmf-dist/fonts/type1/publi
|
||||
c/amsfonts/cm/cmmi8.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/c
|
||||
mr12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb></usr/s
|
||||
hare/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/share/texmf-dis
|
||||
t/fonts/type1/public/amsfonts/cm/cmsy8.pfb></usr/share/texmf-dist/fonts/type1/u
|
||||
rw/courier/ucrr8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><
|
||||
/usr/share/texmf-dist/fonts/type1/urw/times/utmr8a.pfb></usr/share/texmf-dist/f
|
||||
onts/type1/urw/times/utmri8a.pfb>
|
||||
Output written on EEMLA.pdf (10 pages, 147544 bytes).
|
||||
</usr/share/texmf-dist/font
|
||||
s/type1/public/amsfonts/cm/cmbx12.pfb></usr/share/texmf-dist/fonts/type1/public
|
||||
/amsfonts/cm/cmex10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/c
|
||||
mmi12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb></usr
|
||||
/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb></usr/share/texmf-di
|
||||
st/fonts/type1/public/amsfonts/cm/cmr8.pfb></usr/share/texmf-dist/fonts/type1/p
|
||||
ublic/amsfonts/cm/cmsy10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts
|
||||
/cm/cmsy8.pfb></usr/share/texmf-dist/fonts/type1/urw/courier/ucrb8a.pfb></usr/s
|
||||
hare/texmf-dist/fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texmf-dist/fonts
|
||||
/type1/urw/times/utmb8a.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmr8a
|
||||
.pfb></usr/share/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
|
||||
Output written on EEMLA.pdf (11 pages, 163025 bytes).
|
||||
PDF statistics:
|
||||
98 PDF objects out of 1000 (max. 8388607)
|
||||
61 compressed objects within 1 object stream
|
||||
106 PDF objects out of 1000 (max. 8388607)
|
||||
66 compressed objects within 1 object stream
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
|
||||
|
||||
38
EEMLA.tex
38
EEMLA.tex
@@ -68,7 +68,7 @@ February 28 2025\\
|
||||
%%%%Title
|
||||
\begin{center}
|
||||
\vspace{1em}
|
||||
Rule-based Tensor Mutations Embedded within LLMs for Low-Cost Mathematical Comptuation
|
||||
Rule-based Tensor Mutations Embedded within LLMs for Low-Cost Mathematical Computation
|
||||
\end{center}
|
||||
|
||||
|
||||
@@ -92,9 +92,9 @@ In recent years, a specialized kind of Machine Learning models have hit the mark
|
||||
|
||||
These techniques were later commercialized with the advent of GPT-2, GPT-3, and BERT from AI labs like OpenAI and Google's DeepMind \parencite[3]{Wang2024}. With increased supply of Graphical Processing Units (GPUs) and Tensor Processing Units (TPUs), these models began snowballing in scale. This was especially evident starting in 2019 with an iteration of GPT-2 being released with a production size of 1.5 billion parameters. In 2020, GPT-3 scaled up to 175 billion parameters --- achieving true coherence in reasoning for the first time ever for a machine. GPT-4 was released by OpenAI in 2023, with an undisclosed scale in the trillions of parameters. Development investment also climbed into the hundreds of billions of dollars, with new firms such as Anthropic, Grok, etc. Open sourced projects also gained popularity, some backed by multi-billion dollar R\&D teams such as Meta's Llama series.
|
||||
|
||||
Functionally, there is no fundamental algorithmic difference between generative and classification models. Indeed, most LLMs are initially trained to generate new sequences of words by setting the loss function to expect the next word in the series of an existing corpus, through a process known as Casual Language Modeling (CLM). For the purposes of commercialization, they have been re-purposed to be prompted as chat-bots by users. This is done by performing backpropagation based on the generation of conversational sequences, with the LLM often instructed to act as if filling out a conversation's transcript.
|
||||
Functionally, there is no fundamental algorithmic difference between generative and classification models. Indeed, most LLMs are initially trained to generate new sequences of words by setting the loss function to expect the next word in the series of an existing corpus, through a process known as Casual Language Modeling (CLM). For the purposes of commercialization, they have been re-purposed to be prompted as chat-bots by users. This is done by performing back propagation based on the generation of conversational sequences, with the LLM often instructed to act as if filling out a conversation's transcript.
|
||||
|
||||
Several underlying technologies are involved in the lifecycle of an LLM. The process of creating one usually starts with the definition of a vocabulary. Sequences of language are broken into tokens by algorithms called tokenizers. Tokenizers split text into smaller units, which are then encoded into a vector by another MLP. This is done to develop a sense of meaning via the mathematical similarity of similar words. The similarity of two vectors can be calculated using the cosine-similarity formula, which calculates the angle $\phi$ between two vectors.
|
||||
Several underlying technologies are involved in the life cycle of an LLM. The process of creating one usually starts with the definition of a vocabulary. Sequences of language are broken into tokens by algorithms called tokenism's. Tokenizers split text into smaller units, which are then encoded into a vector by another MLP. This is done to develop a sense of meaning via the mathematical similarity of similar words. The similarity of two vectors can be calculated using the cosine-similarity formula, which calculates the angle $\phi$ between two vectors.
|
||||
\[
|
||||
\cos\phi=\frac{\vec{A}\cdot\vec{B}}{||\vec{A}||||\vec{B}||}
|
||||
\]
|
||||
@@ -145,14 +145,14 @@ This research aims to investigate the potential integration of rule-based tensor
|
||||
\textbf{RQ:} How can deterministic rule-based tensor mutations be embedded within LLM architectures to enable more accurate and efficient mathematical operations?
|
||||
\end{quote}
|
||||
|
||||
The significance of this line of inquiry lies in its potential to address a fundamental limitation of current generative AI systems like ChatGPT, Anthropic's Claude, etc. While specialized numeric compute systems exist (e.g. RAG with Wolphram Alpha), they operate independently of the SIMD, low-latency systems of LLMS, leading to sizable latency in communication. This is especially prevalent in workflows involving both mathematical and linguistic reasoning. The integration of computational resources required for such workflows within LLMs could substantially reduce the computational resources required for complex tasks that involve both natural and language processing and mathematical reasoning.
|
||||
The significance of this line of inquiry lies in its potential to address a fundamental limitation of current generative AI systems like ChatGPT, Anthropic's Claude, etc. While specialized numeric compute systems exist (e.g. RAG with Wolfram Alpha), they operate independently of the SIMD, low-latency systems of LLMS, leading to sizable latency in communication. This is especially prevalent in workflows involving both mathematical and linguistic reasoning. The integration of computational resources required for such workflows within LLMs could substantially reduce the computational resources required for complex tasks that involve both natural and language processing and mathematical reasoning.
|
||||
|
||||
This infestation focuses specifically on the following mathematical operations:
|
||||
This investigation focuses specifically on the following mathematical operations:
|
||||
|
||||
\begin{itemize}
|
||||
\item Basic arithmetic (addition, subtraction, multiplication, division)
|
||||
\item Matrix Operations (multiplication, inversion, determinant)
|
||||
\item Binary Opertaions (XOR, AND, NAND, left shift, right shift, OR, complement)
|
||||
\item Binary Operations (XOR, AND, NAND, left shift, right shift, OR, complement)
|
||||
\item Array Operations (array sum, as well as the mean, median, mode, standard deviation, variance, and other single variable metrics of a data set)
|
||||
\end{itemize}
|
||||
|
||||
@@ -188,10 +188,34 @@ The implementation architecture utilizes predetermined index relationships withi
|
||||
|
||||
Each mathematical operation type is assigned specific input and output indices within the tensor, creating a predictable computational graph that can be optimized during compilation using the CUDA compiler, \mintinline{c}|gcc|, and manual assembler optimization like with DeepSeekV3 \parencite[16]{deepseekai2025deepseekv3technicalreport}. Addition operations, for instance, use indices $(i,j)$ and $(i+1,j)$ as inputs, with results stored at $(i,j+d/2)$, effectively partitioning the embedding space into operand and result regions. Multiplication operations utilize indices $(i,j)$ and $(i,j+1)$ as inputs, with results projected to $(i+1,j+d/2)$, maintaining a consistent pattern of spatial relationships within the tensor. More complex operations like matrix determinant calculations employ a $3\times3$ submatrix starting at index $(i,j)$ with results consolidated at $(i+3,j)$. This systematic approach to index mapping enables highly efficient computation on GPU architectures, as the fixed patterns allow for optimized memory access patterns due to hard-coded indexing at compile time, and reduced cache thrashing during tensor operations. Modern GPUs excel at these fixed-pattern operations, particularly when they can be expressed as fused operations within CUDA kernels or optimized through tensor cores designed specifically for matrix multiplication\parencite{cuda_programming_guide_2025}.
|
||||
|
||||
The architecture maintains parallel processing paths that preserve the dual nature of the system's capabilities. The standard language processing path continues to leverage the probabilistic, statistical nature of the transformer architecture, preserving the original LLM capabilities that have proven effective for natural language understanding and generation. Simultaneously, the mathematical computation path applies fixed-index transformations for specific operations, creating a deterministic subsystem within the larger stochastically variant network. These parallel streams capitalize on the inherent parallelism of GPU architectures, allowing different CUDA cores and cache regions to process distinct streams simultaneously. The fixed-index nature of the mathematical operations enables compiler optimizations that can allocate dedicated tensor cores for these operations, maximizing throughput and minimizing latency. Existing models, as shown in Figure \ref{tb:model-sizes} tend to use far more VRAM than cores, leading to an allocation inefficient in terms of performance per millisecond of inference. The paths are later merged through concatenation and a projection layer, a process that similarly benefits from the warp-level primitives available in modern GPU architectures for efficient tensor manipulation.
|
||||
The architecture maintains parallel processing paths that preserve the dual nature of the system's capabilities. The standard language processing path continues to leverage the probabilistic, statistical nature of the transformer architecture, preserving the original LLM capabilities that have proven effective for natural language understanding and generation. Simultaneously, the mathematical computation path applies fixed-index transformations for specific operations, creating a deterministic subsystem within the larger stochastically variant network. These parallel streams capitalize on the inherent parallelism of GPU architectures, allowing different CUDA cores and cache regions to process distinct streams simultaneously. The fixed-index nature of the mathematical operations enables compiler optimizations that can allocate dedicated tensor cores for these operations, maximizing throughput and minimizing latency. Existing models, as shown in Figure \ref{tab:model-sizes} tend to use far more VRAM than cores, leading to an allocation inefficient in terms of performance per millisecond of inference. The paths are later merged through concatenation and a projection layer, a process that similarly benefits from the warp-level primitives available in modern GPU architectures for efficient tensor manipulation.
|
||||
|
||||
The attention mechanism serves as a noise filter and integration component, allowing the model to selectively focus on either standard language representations or mathematically transformed representations based on input context. This selective focusing behavior effectively routes information through the appropriate pathway based on the input's semantic requirements. From a hardware acceleration perspective, this mechanism benefits from the recent advancements in GPU architecture specifically designed for transformer models. The attention operations leverage dedicated tensor cores in NVIDIA's Ampere and Hopper architectures, which provide specialized hardware acceleration for matrix multiplication and accumulation operations at various precisions. The fixed-index nature of the approach enables further optimization of these operations through persistent CUDA kernels that maintain tensor data in high-bandwidth on-chip memory (L3-L4 cache), reducing expensive global memory access operations during the attention computation phase.
|
||||
|
||||
\newpage
|
||||
{\raggedright \normalsize \textbf{Implementation Hardware & Software}}
|
||||
|
||||
\section{Implementation in Rust Using Burn}
|
||||
|
||||
Rust was selected for its memory safety guarantees, zero-cost abstractions, and deterministic concurrency model. The neural network is implemented using the \mintinline{toml}{burn} crate, a modular, backend-agnostic deep learning framework designed for Rust. Burn enables explicit architectural definition via trait-based modules and supports GPU acceleration using backends such as \mintinline{toml}{burn-wgpu} and \mintinline{toml}{burn-candle}. This design aligns with IB Computer Science principles of modularity, abstraction, and system performance.
|
||||
|
||||
\begin{minted}{toml}
|
||||
[dependencies]
|
||||
burn = "0.12"
|
||||
burn-wgpu = "0.12" log = "0.4"
|
||||
env_logger = "0.10"
|
||||
\end{minted}
|
||||
|
||||
The system targets an MSI RTX 4090 (24GB VRAM, 900W), utilizing \mintinline{toml}{burn-wgpu} to leverage WebGPU for training on tensor cores. This setup maximizes throughput for floating-point operations critical in gradient descent and backpropagation.
|
||||
|
||||
\begin{minted}{rust} use burn::module::Module; use burn::tensor::backend::WgpuBackend; use log::{info, warn};
|
||||
|
||||
fn main() {
|
||||
env_logger::init(); info!("Training initialized");
|
||||
}
|
||||
\end{minted}
|
||||
|
||||
The \mintinline{toml}{log} crate provides structured runtime logging, while \mintinline{toml}{env_logger} parses environment variables to configure log levels. Logging supports traceability, a key aspect of IB standards emphasizing system reliability and maintainability. Modular logging also illustrates core software engineering practices, such as separation of concerns and system observability, during neural network training and mutation processes.
|
||||
%%%%Works cited
|
||||
\newpage
|
||||
\begin{center}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
\PYG{n}{env\PYGZus{}logger}
|
||||
\end{MintedVerbatim}
|
||||
@@ -0,0 +1,3 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
\PYG{n}{log}
|
||||
\end{MintedVerbatim}
|
||||
@@ -0,0 +1,3 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
\PYG{n}{burn\PYGZhy{}wgpu}
|
||||
\end{MintedVerbatim}
|
||||
@@ -0,0 +1,6 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
|
||||
\PYG{k}{fn}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{p}{(}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}}
|
||||
\PYG{+w}{ }\PYG{n}{env\PYGZus{}logger}\PYG{p}{::}\PYG{n}{init}\PYG{p}{(}\PYG{p}{)}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{info}\PYG{o}{!}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}}\PYG{l+s}{Training initialized}\PYG{l+s}{\PYGZdq{}}\PYG{p}{)}\PYG{p}{;}
|
||||
\PYG{p}{\PYGZcb{}}
|
||||
\end{MintedVerbatim}
|
||||
@@ -0,0 +1,6 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
\PYG{k}{[}\PYG{k}{dependencies}\PYG{k}{]}
|
||||
\PYG{n}{burn}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{0.12}\PYG{l+s+s2}{\PYGZdq{}}
|
||||
\PYG{n}{burn\PYGZhy{}wgpu}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{0.12}\PYG{l+s+s2}{\PYGZdq{}}\PYG{+w}{ }\PYG{n}{log}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{0.4}\PYG{l+s+s2}{\PYGZdq{}}
|
||||
\PYG{n}{env\PYGZus{}logger}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+s2}{0.10}\PYG{l+s+s2}{\PYGZdq{}}
|
||||
\end{MintedVerbatim}
|
||||
@@ -0,0 +1,3 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
\PYG{n}{burn\PYGZhy{}candle}
|
||||
\end{MintedVerbatim}
|
||||
@@ -0,0 +1,3 @@
|
||||
\begin{MintedVerbatim}[commandchars=\\\{\}]
|
||||
\PYG{n}{burn}
|
||||
\end{MintedVerbatim}
|
||||
@@ -1,9 +1,16 @@
|
||||
{
|
||||
"jobname": "EEMLA",
|
||||
"md5": "EEE4DC88718EF2B9D35BC605B86C9E60",
|
||||
"timestamp": "20250429235425",
|
||||
"timestamp": "20250430004509",
|
||||
"cachefiles": [
|
||||
"2B22D1BB0060FE72ED7A036EB8243444.highlight.minted",
|
||||
"3CECE1A7C91B2352D5EE067F42BA06E6.highlight.minted",
|
||||
"9C4740B2321818118B3501A042F1B7E5.highlight.minted",
|
||||
"B0420CD8DD504542F352049F6401D587.highlight.minted",
|
||||
"B189EEB4D1BC1BF5E5BC7A5A7CCC382C.highlight.minted",
|
||||
"DF772CCE72A4F78D4332765469A7A0E9.highlight.minted",
|
||||
"E1F5AF9FA573AC6BDDC923342CB6518C.highlight.minted",
|
||||
"F9EE4BCA082EFD60976D581C36A6E9EA.highlight.minted",
|
||||
"_EEE4DC88718EF2B9D35BC605B86C9E60.index.minted",
|
||||
"default.style.minted"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user