理解 Monads 来自实践
本主题适用于中级到高级 F#开发人员
“什么是 Monads?” 是一个常见的问题。这很容易回答, 但就像 Hitchhikers 指导银河系一样,我们意识到我们不理解答案,因为我们不知道我们要求的是什么。
许多人认为理解 Monads 的方法是练习它们。作为程序员,我们通常不关心 Liskov 的替换原则,子类型或子类的数学基础。通过使用这些想法,我们获得了他们所代表的直觉。Monads 也是如此。
为了帮助你开始使用 Monads,此示例演示了如何构建 Monadic Parser Combinator 库。这可能有助于你入门,但理解将来自编写你自己的 Monadic 库。
足够的散文,代码的时间
解析器类型:
// A Parser<'T> is a function that takes a string and position
// and returns an optionally parsed value and a position
// A parsed value means the position points to the character following the parsed value
// No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)
使用 Parser 的这个定义,我们定义了一些基本的解析器函数
// Runs a parser 't' on the input string 's'
let run t s =
let (Parser tps) = t
tps (s, 0)
// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt p = None , p
// The 'satisfy' parser succeeds if the character at the current position
// passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
else failAt p
// 'eos' succeeds if the position is beyond the last character.
// Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
if p < s.Length then failAt p
else succeedWith () p
let anyChar = satisfy (fun _ -> true)
let char ch = satisfy ((=) ch)
let digit = satisfy System.Char.IsDigit
let letter = satisfy System.Char.IsLetter
satisfy
是一个函数,如果我们没有通过 EOS
并且当前位置的字符通过 sat
函数,则给定 sat
函数会生成一个成功的解析器。使用 satisfy
,我们创建了许多有用的字符解析器。
在 FSI 中运行:
> run digit "";;
val it : char option * int = (null, 0)
> run digit "123";;
val it : char option * int = (Some '1', 1)
> run digit "hello";;
val it : char option * int = (null, 0)
我们有一些基本的解析器。我们将使用解析器组合函数将它们组合成更强大的解析器
// 'fail' is a parser that always fails
let fail<'T> = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v = Parser <| fun (s, p) -> succeedWith v p
// 'bind' let us combine two parser into a more complex parser
let bind t uf = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None -> None, tp
| Some tv ->
let u = uf tv
let (Parser ups) = u
ups (s, tp)
名称和签名不是任意选择的, 但我们不会深思熟虑,而是让我们看看我们如何使用 bind
将解析器组合成更复杂的解析器:
> run (bind digit (fun v -> digit)) "123";;
val it : char option * int = (Some '2', 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "123";;
val it : (char * char) option * int = (Some ('1', '2'), 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "1";;
val it : (char * char) option * int = (null, 1)
这告诉我们的是 bind
允许我们将两个解析器组合成一个更复杂的解析器。由于 bind
的结果是一个解析器,反过来可以再次组合。
> run (bind digit (fun v -> bind digit (fun w -> bind digit (fun u -> return_ (v,w,u))))) "123";;
val it : (char * char * char) option * int = (Some ('1', '2', '3'), 3)
虽然我们将定义辅助函数来简化语法,但 bind
将是我们组合解析器的基本方法。
可以简化语法的一个方面是计算表达式 。它们很容易定义:
type ParserBuilder() =
member x.Bind (t, uf) = bind t uf
member x.Return v = return_ v
member x.ReturnFrom t = t
// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()
FSI
let p = parser {
let! v = digit
let! u = digit
return v,u
}
run p "123"
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)
这相当于:
> let p = bind digit (fun v -> bind digit (fun u -> return_ (v,u)))
run p "123";;
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)
我们将要使用的另一个基本的解析器组合器是 orElse
:
// 'orElse' creates a parser that runs parser 't' first, if that is successful
// the result is returned otherwise the result of parser 'u' is returned
let orElse t u = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None ->
let (Parser ups) = u
ups (s, p)
| Some tv -> succeedWith tv tp
这允许我们像这样定义 letterOrDigit
:
> let letterOrDigit = orElse letter digit;;
val letterOrDigit : Parser<char> = Parser <fun:orElse@70-1>
> run letterOrDigit "123";;
val it : char option * int = (Some '1', 1)
> run letterOrDigit "hello";;
val it : char option * int = (Some 'h', 1)
> run letterOrDigit "!!!";;
val it : char option * int = (null, 0)
关于 Infix 运算符的说明
对 FP 的一个共同关注点是使用不常用的中缀运算符,如 >>=
,>=>
,<-
等。然而,大多数人并不关心使用+
,-
,*
,/
和%
,这些都是众所周知的运算符,用于组成值。然而,FP 中的一个重要部分不仅是组成值,还包括函数。对于中级 FP 开发者来说,中缀运算符 >>=
,>=>
,<-
是众所周知的,应该具有特定的签名和语义。
对于我们到目前为止定义的函数,我们将定义以下用于组合解析器的中缀运算符:
let (>>=) t uf = bind t uf
let (<|>) t u = orElse t u
所以 >>=
意味着 bind
而 <|>
意味着 orElse
。
这允许我们组合解析器更简洁:
let letterOrDigit = letter <|> digit
let p = digit >>= fun v -> digit >>= fun u -> return_ (v,u)
为了定义一些高级解析器组合器,它们将允许我们解析更复杂的表达式,我们定义了一些更简单的解析器组合器:
// 'map' runs parser 't' and maps the result using 'm'
let map m t = t >>= (m >> return_)
let (>>!) t m = map m t
let (>>%) t v = t >>! (fun _ -> v)
// 'opt' takes a parser 't' and creates a parser that always succeed but
// if parser 't' fails the new parser will produce the value 'None'
let opt t = (t >>! Some) <|> (return_ None)
// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u =
parser {
let! tv = t
let! tu = u
return tv, tu
}
我们已经准备好定义 many
和 sepBy
,它们在应用输入解析器时会更加高级,直到它们失败。然后 many
和 sepBy
返回聚合结果:
// 'many' applies parser 't' until it fails and returns all successful
// parser results as a list
let many t =
let ot = opt t
let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
loop []
// 'sepBy' applies parser 't' separated by 'sep'.
// The values are reduced with the function 'sep' returns
let sepBy t sep =
let ots = opt (pair sep t)
let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
t >>= loop
创建一个简单的表达式解析器
使用我们创建的工具,我们现在可以为像 1+2*3
这样的简单表达式定义解析器
我们从底部开始,为整数 pint
定义一个解析器
// 'pint' parses an integer
let pint =
let f s v = 10*s + int v - int '0'
parser {
let! digits = many digit
return!
match digits with
| [] -> fail
| vs -> return_ (List.fold f 0 vs)
}
我们尝试尽可能多地解析数字,结果是 char list
。如果列表为空,我们 fail
,否则我们将字符折叠成整数。
在 FSI 中测试 pint
:
> run pint "123";;
val it : int option * int = (Some 123, 3)
另外,我们需要解析用于组合整数值的不同类型的运算符:
// operator parsers, note that the parser result is the operator function
let padd = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide = char '/' >>% (/)
let pmodulus = char '%' >>% (%)
FSI:
> run padd "+";;
val it : (int -> int -> int) option * int = (Some <fun:padd@121-1>, 1)
把它们绑定在一起:
// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr =
parser {
let! v = paddlike
let! _ = eos // To make sure the full string is consumed
return v
}
在 FSI 中运行它:
> run pexpr "2+123*2-3";;
val it : int option * int = (Some 245, 9)
结论
通过定义 Parser<'T>
,return_
,bind
并确保它们遵守 monadic 定律, 我们构建了一个简单但强大的 Monadic Parser Combinator 框架。
Monads 和 Parsers 一起使用,因为 Parsers 在解析器状态下执行。Monads 允许我们在隐藏解析器状态的同时组合解析器,从而减少混乱并提高可组合性。
我们创建的框架很慢并且不会产生任何错误消息,这是为了保持代码简洁。 FParsec 提供可接受的性能以及出色的错误消息。
然而,仅凭一个例子无法理解 Monads。一个人必须练习 Monads。
以下是你可以尝试实施的 Monads 的一些示例,以达到你的理解:
- State Monad - 允许隐式环境状态被隐式携带
- Tracer Monad - 允许隐式携带跟踪状态。State Monad 的变种
- Turtle Monad - 用于创建
Turtle(Logos)
程序的 Monad。State Monad 的变种 - 继续 Monad - 一个协程 Monad。这方面的一个例子是 F#中的
async
要学习的最好方法是在你熟悉的域中提出 Monads 的应用程序。对我来说,这是解析器。
完整源代码:
// A Parser<'T> is a function that takes a string and position
// and returns an optionally parsed value and a position
// A parsed value means the position points to the character following the parsed value
// No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)
// Runs a parser 't' on the input string 's'
let run t s =
let (Parser tps) = t
tps (s, 0)
// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt p = None , p
// The 'satisfy' parser succeeds if the character at the current position
// passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
else failAt p
// 'eos' succeeds if the position is beyond the last character.
// Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
if p < s.Length then failAt p
else succeedWith () p
let anyChar = satisfy (fun _ -> true)
let char ch = satisfy ((=) ch)
let digit = satisfy System.Char.IsDigit
let letter = satisfy System.Char.IsLetter
// 'fail' is a parser that always fails
let fail<'T> = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v = Parser <| fun (s, p) -> succeedWith v p
// 'bind' let us combine two parser into a more complex parser
let bind t uf = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None -> None, tp
| Some tv ->
let u = uf tv
let (Parser ups) = u
ups (s, tp)
type ParserBuilder() =
member x.Bind (t, uf) = bind t uf
member x.Return v = return_ v
member x.ReturnFrom t = t
// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()
// 'orElse' creates a parser that runs parser 't' first, if that is successful
// the result is returned otherwise the result of parser 'u' is returned
let orElse t u = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None ->
let (Parser ups) = u
ups (s, p)
| Some tv -> succeedWith tv tp
let (>>=) t uf = bind t uf
let (<|>) t u = orElse t u
// 'map' runs parser 't' and maps the result using 'm'
let map m t = t >>= (m >> return_)
let (>>!) t m = map m t
let (>>%) t v = t >>! (fun _ -> v)
// 'opt' takes a parser 't' and creates a parser that always succeed but
// if parser 't' fails the new parser will produce the value 'None'
let opt t = (t >>! Some) <|> (return_ None)
// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u =
parser {
let! tv = t
let! tu = u
return tv, tu
}
// 'many' applies parser 't' until it fails and returns all successful
// parser results as a list
let many t =
let ot = opt t
let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
loop []
// 'sepBy' applies parser 't' separated by 'sep'.
// The values are reduced with the function 'sep' returns
let sepBy t sep =
let ots = opt (pair sep t)
let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
t >>= loop
// A simplistic integer expression parser
// 'pint' parses an integer
let pint =
let f s v = 10*s + int v - int '0'
parser {
let! digits = many digit
return!
match digits with
| [] -> fail
| vs -> return_ (List.fold f 0 vs)
}
// operator parsers, note that the parser result is the operator function
let padd = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide = char '/' >>% (/)
let pmodulus = char '%' >>% (%)
// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr =
parser {
let! v = paddlike
let! _ = eos // To make sure the full string is consumed
return v
}