@Article{JML-3-300,
author = {Yang , HongkangLin , ZehaoWang , WenjinWu , HaoLi , ZhiyuTang , BoWei , WenqiangWang , JinboTang , ZeyunSong , ShichaoXi , ChenyangYu , YuChen , KaiXiong , FeiyuTang , Linpeng and E , Weinan},
title = {Memory$^3$: Language Modeling with Explicit Memory},
journal = {Journal of Machine Learning},
year = {2024},
volume = {3},
number = {3},
pages = {300--346},
abstract = {<p style="text-align: justify;">The training and inference of large language models (LLMs) are together a costly process that transports knowledge from raw data to meaningful computation. Inspired by the memory hierarchy of the human
brain, we reduce this cost by equipping LLMs with explicit memory, a memory format cheaper than model
parameters and text retrieval-augmented generation (RAG). Conceptually, with most of its knowledge externalized to explicit memories, the LLM can enjoy a smaller parameter size, training cost, and inference cost, all
proportional to the amount of remaining “abstract knowledge”. As a preliminary proof of concept, we train
from scratch a 2.4 B LLM, which achieves better performance than much larger LLMs as well as RAG models,
and maintains higher decoding speed than RAG. The model is named ${\rm Memory}^3$, since explicit memory is the
third form of memory in LLMs after implicit memory (model parameters) and working memory (context key-values). We introduce a memory circuitry theory to support the externalization of knowledge, and present
novel techniques including a memory sparsification mechanism that makes storage tractable and a two-stage
pretraining scheme that facilitates memory formation.</p>},
issn = {2790-2048},
doi = {https://doi.org/10.4208/jml.240708},
url = {http://global-sci.org/intro/article_detail/jml/23419.html}
}