diff --git a/papers/incorrectness24/bibliography.bib b/papers/incorrectness24/bibliography.bib new file mode 100644 index 00000000..abb03800 --- /dev/null +++ b/papers/incorrectness24/bibliography.bib @@ -0,0 +1,94 @@ +@InProceedings{BFJ21:GoalsLuau, + author = {L. Brown and A. Friesen and A. S. A. Jeffrey}, + title = {Position Paper: Goals of the Luau Type System}, + booktitle = {Proc. Human Aspects of Types and Reasoning Assistants}, + year = {2021}, + url = {https://asaj.org/papers/hatra21.pdf}, +} + +@Misc{Roblox, + author = {Roblox}, + title = {Reimagining the way people come together}, + year = 2023, + url = {https://corp.roblox.com}, +} + +@Misc{Luau, + author = {Roblox}, + title = {The {Luau} Programming Language}, + year = 2023, + url = {https://luau-lang.org}, +} + +@Misc{Lua, + author = {Lua.org and {PUC}-Rio}, + title = {The {Lua} Programming Language}, + year = 2023, + url = {https://lua.org}, +} + +@Misc{Jef22:SemanticSubtyping, + author = {A. S. A. Jeffrey}, + title = {Semantic Subtyping in Luau}, + howpublished = {Roblox Technical Blog}, + year = {2022}, + url = {https://blog.roblox.com/2022/11/semantic-subtyping-luau/}, +} + +@InProceedings{GF05:GentleIntroduction, + author = {G. Castagna and A. Frisch}, + title = {A Gentle Introduction to Semantic Subtyping}, + booktitle = {Proc. Principles and Practice of Declarative Programming}, + year = {2005}, +} + +@InProceedings{ST07:GradualTyping, + author = {J. G. Siek and W. Taha}, + title = {Gradual Typing for Objects}, + booktitle = {Proc. European Conf Object-Oriented Programming}, + year = {2007}, + pages = {2-27}, +} + +@Misc{BJ23:agda-typeck, + author = {L. Brown and A. S. A. Jeffrey}, + title = {Luau Prototype Typechecker}, + year = {2023}, + OPTnote = {}, + url = {https://github.com/luau-lang/agda-typeck} +} + +@article{PT00:LocalTypeInference, +author = {Pierce, B. C. and Turner, D. N.}, +title = {Local Type Inference}, +year = {2000}, +volume = {22}, +number = {1}, +journal = {ACM Trans. Program. Lang. Syst.}, +pages = {1–44}, +} + +@inproceedings{SuccessTyping, + author = {Lindahl, Tobias and Sagonas, Konstantinos}, + title = {Practical Type Inference Based on Success Typings}, + year = {2006}, + booktitle = {Proc. Int. Conf. Principles and Practice of Declarative Programming}, + pages = {167–178}, +} + +@InProceedings{Dialyzer, +author="Lindahl, Tobias and Sagonas, Konstantinos", +title="Detecting Software Defects in Telecom Applications Through Lightweight Static Analysis: A War Story", +booktitle="Proc. Asian Symp. Programming Languages and Systems", +year="2004", +pages="91--106", +} + +@Misc{NewNonStrictRFC, + author = {Alan Jeffrey}, + title = {{RFC} For Newv Non-strict Mode}, + howpublished = {Luau Request For Comment}, + year = {2023}, + note = {\url{https://github.com/Roblox/luau/pull/1037}}, +} + diff --git a/papers/incorrectness24/cc-by.png b/papers/incorrectness24/cc-by.png new file mode 100644 index 00000000..c8473a24 Binary files /dev/null and b/papers/incorrectness24/cc-by.png differ diff --git a/papers/incorrectness24/incorrectness24.pdf b/papers/incorrectness24/incorrectness24.pdf new file mode 100644 index 00000000..7b3cec10 Binary files /dev/null and b/papers/incorrectness24/incorrectness24.pdf differ diff --git a/papers/incorrectness24/incorrectness24.tex b/papers/incorrectness24/incorrectness24.tex new file mode 100644 index 00000000..cae73742 --- /dev/null +++ b/papers/incorrectness24/incorrectness24.tex @@ -0,0 +1,316 @@ +\documentclass[sigplan]{acmart} + +\setcopyright{rightsretained} +\copyrightyear{2024} +\acmYear{2024} +\acmConference[Incorrectness '24]{Formal Methods for Incorrectness}{January 2024}{London, UK} +\acmBooktitle{Incorrectness '24: Formal Methods for Incorrectness} +\acmDOI{} +\acmISBN{} +\expandafter\def\csname @copyrightpermission\endcsname{\raisebox{-2ex}{\includegraphics[width=.2\linewidth]{cc-by}} \parbox{.7\linewidth}{\raggedright This work is licensed under a Creative Commons Attribution 4.0 International License.}} +\expandafter\def\csname @copyrightowner\endcsname{Roblox.} +\newcommand{\infer}[2]{\frac{\displaystyle\begin{array}{@{}l@{}}#1\end{array}}{\displaystyle#2}} +\newcommand{\LOCAL}{\mathop{\mathsf{local}}} +\newcommand{\FUNCTION}{\mathop{\mathsf{function}}} +\newcommand{\IF}{\mathop{\mathsf{if}}} +\newcommand{\THEN}{\mathbin{\mathsf{then}}} +\newcommand{\ELSE}{\mathbin{\mathsf{else}}} +\newcommand{\END}{\mathop{\mathsf{end}}} +\newcommand{\NEVER}{\mathsf{never}} +\newcommand{\ERROR}{\mathsf{error}} +\newcommand{\UNKNOWN}{\mathsf{unknown}} +\newcommand{\STRING}{\mathsf{string}} +\newcommand{\NUMBER}{\mathsf{number}} +\newcommand{\MATH}{\mathsf{math}} +\newcommand{\ABS}{\mathsf{abs}} +\newcommand{\LOWER}{\mathsf{lower}} +\newcommand{\fun}{\mathbin{\rightarrow}} + +\begin{document} + +\title{Towards an Unsound But Complete Type System} +\subtitle{Work In Progress on New Non-Strict Mode for Luau} + +\author{Lily Brown} +\author{Andy Friesen} +\author{Alan Jeffrey} +\author{Vighnesh Vijay} +\affiliation{ + \institution{Roblox} + \city{San Mateo} + \state{CA} + \country{USA} +} + +\begin{abstract} +In HATRA 2021, we presented \emph{The Goals Of The Luau Type System}, +describing the human factors of a type system for a language with a +heterogeneous developer community. One of the goals was the design of +type system for bug detection, where we have high confidence that type +errors isolate genuine software defects, and that false positives are +minimized. Such a type system is, by necessity, unsound, but we can ask +instead that it is complete. This paper presents a work-in-progress report +on the design and implementation of the new non-strict mode for Luau. +\end{abstract} + +\maketitle + +\section{Introduction} + +Luau~\cite{Luau} is the scripting language used by the +Roblox~\cite{Roblox} platform for shared immersive experiences. Luau extends +the Lua~\cite{Lua} language, notably by providing type-driven tooling +such as autocomplete and API documentation (as well as traditional type +error reporting). Roblox has hundreds of millions of users, and +millions of creators, ranging from children learning to program for +the first time to professional development studios. + +In HATRA 2021, we presented a position paper on the \emph{Goals Of The Luau Type +System}~\cite{BFJ21:GoalsLuau}, describing the human factors issues +with designing a type system for a language with a heterogeneous +developer community. The design flows from the needs of the different +communities: beginners are focused on immediate goals (``the stairs +should light up when a player walks on them'') and less on the code +quality concerns of more experienced developers; for all users +type-driven tooling is important for productivity. These needs result in a design with two modes: +\begin{itemize} +\item \emph{non-strict mode}, aimed at non-professionals, focused on + minimizing false positives (that is, in non-strict mode, any program with a type error has a defect), and +\item \emph{strict mode}, aimed at professionals, focused on + minimizing false negatives (that is, in strict mode, any program with a defect has a type error). +\end{itemize} +The focus of this extended abstract is the design of non-strict mode: what constitutes +a defect, and how can we design a complete type system for detecting them. +(The words ``sound'' and ``complete'' in this sense arte far from ideal, +but ``sound type system'' has a well-established meaning, and ``complete'' +is well-established as the dual of ``sound'', so here we are.) + +The closest work to ours is success typing~\cite{SuccessTyping}, used +in Erlang Dialyzer~\cite{Dialyzer}. The new feature of our work is +that strict and non-strict mode have to intertact, whereas Dialyzer only has +the equivalent of non-strict mode. + +New non-strict mode is specified in a Luau Request For +Comment~\cite{NewNonStrictRFC}, and is currently being implemented. +We expect it (and other new typechecking features) to be available in +2024. This extended abstract is based on the RFC, but written in +``greek letters and horizontal lines'' rather than ``monspaced text'' +for mat. + +\section{Code defects} + +The main goal of non-strict mode is to identify defects, but this requires +identifying what constitutes a defect,. Run-time errors are an obvious defect: +\begin{verbatim} + local hi = "hi" + print(math.abs(hi)) +\end{verbatim} +but we also want to catch common mistakes such as mis-spelling a property name, +even though Luau returns \verb|nil| for missing property accesses. +For this reason, we consider a larger class of defects: +\begin{itemize} +\item run-time errors, +\item expressions guaranteed to be \verb|nil|, and +\item writing to a table property that is never read. +\end{itemize} + +\subsection{Run-time errors} + +Run-time errors occur due to run-time type mismatches (such as \verb|5("hi")|) +or buit-in functios (such as \verb|math.abs("hi")|). +Detecting run-time errors is undecidable, for example: +\begin{verbatim} + if cond() then + math.abs(“hi”) + end +\end{verbatim} +In general, we cannot be sure that this code produces a run-time +error, but we do know that if \verb|math.abs("hi")| is executed, it +will produce an error, so we consider this to be a defect. + +\subsection{Expressions guaranteed to be nil} + +Luau tables do not error when a missing property is accessed (though embeddings may). So +\begin{verbatim} + local t = { Foo = 5 } + local x = t.Fop +\end{verbatim} +won’t produce a run-time error, but is more likely than not a +programmer error. In this case, if the programmer intent was to +initialize \verb|x| as \verb|nil|, they could have initialized +\verb|x = nil|. For this reason, we consider it a code defect to use +an expression that the type system infers is of type \verb|nil|, other +than the \verb|nil| constant itself. + +\subsection{Writing properties that are never read} + +There is a matching problem with misspelling properties when writing. For example +\begin{verbatim} + function f() + local t = {} + t.Foo = 5 + t.Fop = 7 + print(t.Foo) + end +\end{verbatim} +won’t produce a run-time error, but is more likely than not a +programmer error, since \verb|t.Fop| is written but never read. We can use +read-only and write-only table properties types for this, and make it an +error to create a write-only property. + +We have to be careful about this though, because if \verb|f| ended +with \verb|return t|, then it would be a perfectly sensible function +with type \verb|() -> { Foo: number, Fop: number }|. The only way to detect +that \verb|Fop| was never read would be whole-program analysis, which is +prohibitively expensive. + +\section{New Non-strict error reporting} + +The difficult part of non-strict mode error-reporting is detecting +guaranteed run-time errors. We do this using an error-reporting +pass that generates a type context such that if any of the $x : T$ in +the type context are satisfied, then the program is guaranteed to +produce a type error. + +For example in the program +\begin{verbatim} + function h(x, y) + math.abs(x) + string.lower(y) + end +\end{verbatim} +an error is reported when \verb|x| isn’t a \verb|number|, or \verb|y| isn’t a \verb|string|, so the generated context is +\begin{verbatim} + x : ~number + y : ~string +\end{verbatim} +(\verb|~T| is Luau's concrete syntax for type negation.) +In the function: +\begin{verbatim} + function f(x) + math.abs(x) + string.lower(x) + end +\end{verbatim} +an error is reported when \verb|x| isn’t a \verb|number| or isn’t a \verb|string|, so the constraint set is +\begin{verbatim} + x : ~number | ~string +\end{verbatim} +(\verb"T|U" is Luau's concrete syntax for type negation.) +Since \verb"~number | ~string" is equivalent to the type \verb|unknown| (which contains every value), +non-strict mode can report a warning, since calling the function is guaranteed to throw a run-time error. +In contrast: +\begin{verbatim} + function g(x) + if cond() then + math.abs(x) + else + string.lower(x) + end + end +\end{verbatim} +generates context +\begin{verbatim} + x : ~number & ~string +\end{verbatim} +(\verb|T&U| is Luau's concrete syntax for type intersection.) +Since \verb|~number & ~string| is not equivalent to \verb|unknown|, non-strict mode reports no warning. + +\begin{figure*} + \[\begin{array}{c} + \infer{ + \Gamma \vdash M : \NEVER \dashv \Delta_1 \\ + \Gamma, x : T \vdash B \dashv \Delta_2, x : U \\ + \mbox{(warn if $\UNKNOWN <: U$)} + }{ + \Gamma \vdash (\LOCAL x : T = M; B) \dashv (\Delta_1 \cup \Delta_2) + } + \quad + \infer{ + \Gamma \vdash M : \NEVER \dashv \Delta_1 \\ + \Gamma \vdash B \dashv \Delta_2 \\ + \Gamma \vdash C \dashv \Delta_3 + }{ + \Gamma \vdash (\IF M \THEN B \ELSE C \END) \dashv (\Delta_1 \cup (\Delta_2 \cap \Delta_3)) + } + \\[\bigskipamount] + \infer{}{ + \Gamma \vdash x : T \dashv (x : T) + } + \quad + \infer{ + \mbox{(warn if $k:T$)} + }{ + \Gamma \vdash k : T \dashv \emptyset + } + \quad + \infer{ + \Gamma, x:S \vdash B \dashv \Delta, x:U \\ + \mbox{(warn if $\UNKNOWN <: U$)}\\ + \mbox{(warn if ${\FUNCTION} <: T$)} + }{ + \Gamma \vdash (\FUNCTION (x : S) B \END) : T \dashv \Delta + } + \quad + \infer{ + \Gamma \vdash M : (S \fun \ERROR) \\ + \Gamma \vdash M : \neg{\FUNCTION} \dashv \Delta_1 \\ + \Gamma \vdash N : S \dashv \Delta_2 \\ + \mbox{(warn if $\Gamma \vdash M : (\UNKNOWN \fun T)$)} + }{ + \Gamma \vdash M(N) : T \dashv \Delta_1 \cup \Delta_2 + } + \end{array}\] + \caption{Type context generation for blocks ($\Gamma \vdash B \dashv \Delta$) and expressions ($\Gamma \vdash M:T \dashv \Delta$)} + \label{fig:ctxtgen} +\end{figure*} + +\begin{figure*} + \[ + \infer{ + \infer{}{\Gamma \vdash \MATH.\ABS : (\neg\NUMBER \fun \ERROR)} \quad + \infer{}{\Gamma \vdash \MATH.\ABS : \neg{\FUNCTION} \dashv \emptyset} \quad + \infer{ + \Gamma \vdash \STRING.\LOWER : (\neg\STRING \fun \ERROR) \\ + \Gamma \vdash \STRING.\LOWER : \neg{\FUNCTION} \dashv \emptyset \\ + \Gamma \vdash x : \neg{\STRING} \dashv (x : \neg\STRING) \\ + \mbox{(warn since $\Gamma \vdash \STRING.\LOWER : \UNKNOWN \fun \neg\NUMBER$)} + }{\Gamma \vdash \STRING.\LOWER(x) : \neg{\NUMBER} \dashv (x : \neg\STRING)} + }{\Gamma \vdash (\MATH.\ABS(\STRING.\LOWER(x)) : \NEVER \dashv (x : \neg\STRING)} + \] + \caption{Example warning} + \label{fig:example} +\end{figure*} + + +In Figure~ref{fig:ctxtgen} we provide some of the inference rules for +conbtext generation, and the warnings that context generation produces. +These are run after type inference, so theay can assume that all code is fully typed. + +These rules generaralize type intersection and union to type contexts, +write $\emptyset$ for the everywhere-$\NEVER$ context, and write $x:T$ +for the context that is everywhere $\NEVER$ except for $x$ where it +maps to $T$. + +\begin{conjecture} +If $\Gamma \vdash M : T \dashv \Delta, x:U$ and $\sigma$ is a closing +substitution where $\sigma(x) : U$, and $M[\sigma] \rightarrow^* v$ +then $v : T$. +\end{conjecture} + +\begin{corollary} +If $\Gamma \vdash M : \NEVER \dashv \Delta, x:\UNKNOWN$ and $\sigma$ is a closing +substitution and $M[\sigma]$ does not terminate successfully. +\end{corollary} + +\section{Checked functions} + +TODO + +\section{Future work} + +TODO + +\bibliographystyle{ACM-Reference-Format} \bibliography{bibliography} + +\end{document}