理解 Monads 来自实践
本主题适用于中级到高级 F#开发人员
“什么是 Monads?” 是一个常见的问题。这很容易回答, 但就像 Hitchhikers 指导银河系一样,我们意识到我们不理解答案,因为我们不知道我们要求的是什么。
许多人认为理解 Monads 的方法是练习它们。作为程序员,我们通常不关心 Liskov 的替换原则,子类型或子类的数学基础。通过使用这些想法,我们获得了他们所代表的直觉。Monads 也是如此。
为了帮助你开始使用 Monads,此示例演示了如何构建 Monadic Parser Combinator 库。这可能有助于你入门,但理解将来自编写你自己的 Monadic 库。
足够的散文,代码的时间
解析器类型:
// A Parser<'T> is a function that takes a string and position
//  and returns an optionally parsed value and a position
//  A parsed value means the position points to the character following the parsed value
//  No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)
使用 Parser 的这个定义,我们定义了一些基本的解析器函数
// Runs a parser 't' on the input string 's'
let run t s =
  let (Parser tps) = t
  tps (s, 0)
// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt        p = None  , p
// The 'satisfy' parser succeeds if the character at the current position 
//  passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
  if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
  else failAt p
// 'eos' succeeds if the position is beyond the last character.
//  Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
  if p < s.Length then failAt p
  else succeedWith () p
let anyChar       = satisfy (fun _ -> true)
let char ch       = satisfy ((=) ch)
let digit         = satisfy System.Char.IsDigit
let letter        = satisfy System.Char.IsLetter
satisfy 是一个函数,如果我们没有通过 EOS 并且当前位置的字符通过 sat 函数,则给定 sat 函数会生成一个成功的解析器。使用 satisfy,我们创建了许多有用的字符解析器。
在 FSI 中运行:
> run digit "";;
val it : char option * int = (null, 0)
> run digit "123";;
val it : char option * int = (Some '1', 1)
> run digit "hello";;
val it : char option * int = (null, 0)
我们有一些基本的解析器。我们将使用解析器组合函数将它们组合成更强大的解析器
// 'fail' is a parser that always fails
let fail<'T>      = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v     = Parser <| fun (s, p) -> succeedWith v p
// 'bind' let us combine two parser into a more complex parser
let bind t uf     = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> None, tp
  | Some tv ->
    let u = uf tv
    let (Parser ups) = u
    ups (s, tp)
名称和签名不是任意选择的, 但我们不会深思熟虑,而是让我们看看我们如何使用 bind 将解析器组合成更复杂的解析器:
> run (bind digit (fun v -> digit)) "123";;
val it : char option * int = (Some '2', 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "123";;
val it : (char * char) option * int = (Some ('1', '2'), 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "1";;
val it : (char * char) option * int = (null, 1)
这告诉我们的是 bind 允许我们将两个解析器组合成一个更复杂的解析器。由于 bind 的结果是一个解析器,反过来可以再次组合。
> run (bind digit (fun v -> bind digit (fun w -> bind digit (fun u -> return_ (v,w,u))))) "123";;
val it : (char * char * char) option * int = (Some ('1', '2', '3'), 3)
虽然我们将定义辅助函数来简化语法,但 bind 将是我们组合解析器的基本方法。
可以简化语法的一个方面是计算表达式 。它们很容易定义:
type ParserBuilder() =
  member x.Bind       (t, uf) = bind      t   uf
  member x.Return     v       = return_   v
  member x.ReturnFrom t       = t
// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()
FSI
let p = parser {
          let! v = digit
          let! u = digit
          return v,u
        }
run p "123"
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)
这相当于:
> let p = bind digit (fun v -> bind digit (fun u -> return_ (v,u)))
run p "123";;
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)
我们将要使用的另一个基本的解析器组合器是 orElse:
// 'orElse' creates a parser that runs parser 't' first, if that is successful
//  the result is returned otherwise the result of parser 'u' is returned
let orElse t u    = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> 
    let (Parser ups) = u
    ups (s, p)
  | Some tv -> succeedWith tv tp
这允许我们像这样定义 letterOrDigit:
> let letterOrDigit = orElse letter digit;;
val letterOrDigit : Parser<char> = Parser <fun:orElse@70-1>
> run letterOrDigit "123";;
val it : char option * int = (Some '1', 1)
> run letterOrDigit "hello";;
val it : char option * int = (Some 'h', 1)
> run letterOrDigit "!!!";;
val it : char option * int = (null, 0)
关于 Infix 运算符的说明
对 FP 的一个共同关注点是使用不常用的中缀运算符,如 >>=,>=>,<- 等。然而,大多数人并不关心使用+,-,*,/和%,这些都是众所周知的运算符,用于组成值。然而,FP 中的一个重要部分不仅是组成值,还包括函数。对于中级 FP 开发者来说,中缀运算符 >>=,>=>,<- 是众所周知的,应该具有特定的签名和语义。
对于我们到目前为止定义的函数,我们将定义以下用于组合解析器的中缀运算符:
let (>>=)   t   uf  = bind t uf
let (<|>)   t   u   = orElse t u
所以 >>= 意味着 bind 而 <|> 意味着 orElse。
这允许我们组合解析器更简洁:
let letterOrDigit = letter <|> digit
let p = digit >>= fun v -> digit >>= fun u -> return_ (v,u)
为了定义一些高级解析器组合器,它们将允许我们解析更复杂的表达式,我们定义了一些更简单的解析器组合器:
// 'map' runs parser 't' and maps the result using 'm'
let map m t       = t >>= (m >> return_)
let (>>!) t m     = map m t
let (>>%) t v     = t >>! (fun _ -> v)
// 'opt' takes a parser 't' and creates a parser that always succeed but
//  if parser 't' fails the new parser will produce the value 'None'
let opt t         = (t >>! Some) <|> (return_ None)
// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u      = 
  parser {
    let! tv = t
    let! tu = u
    return tv, tu
  }
我们已经准备好定义 many 和 sepBy,它们在应用输入解析器时会更加高级,直到它们失败。然后 many 和 sepBy 返回聚合结果:
// 'many' applies parser 't' until it fails and returns all successful
//  parser results as a list
let many t =
  let ot = opt t
  let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
  loop []
// 'sepBy' applies parser 't' separated by 'sep'. 
//  The values are reduced with the function 'sep' returns
let sepBy t sep     =
  let ots = opt (pair sep t)
  let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
  t >>= loop
创建一个简单的表达式解析器
使用我们创建的工具,我们现在可以为像 1+2*3 这样的简单表达式定义解析器
我们从底部开始,为整数 pint 定义一个解析器
// 'pint' parses an integer
let pint = 
  let f s v = 10*s + int v - int '0'
  parser {
    let! digits = many digit
    return! 
      match digits with
      | [] -> fail
      | vs -> return_ (List.fold f 0 vs)
  }
我们尝试尽可能多地解析数字,结果是 char list。如果列表为空,我们 fail,否则我们将字符折叠成整数。
在 FSI 中测试 pint:
> run pint "123";;
val it : int option * int = (Some 123, 3)
另外,我们需要解析用于组合整数值的不同类型的运算符:
// operator parsers, note that the parser result is the operator function 
let padd      = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide   = char '/' >>% (/)
let pmodulus  = char '%' >>% (%)
FSI:
> run padd "+";;
val it : (int -> int -> int) option * int = (Some <fun:padd@121-1>, 1)
把它们绑定在一起:
// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike  = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike  = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr     =
  parser {
    let! v = paddlike
    let! _ = eos      // To make sure the full string is consumed
    return v
  }
在 FSI 中运行它:
> run pexpr "2+123*2-3";;
val it : int option * int = (Some 245, 9)
结论
通过定义 Parser<'T>,return_,bind 并确保它们遵守 monadic 定律, 我们构建了一个简单但强大的 Monadic Parser Combinator 框架。
Monads 和 Parsers 一起使用,因为 Parsers 在解析器状态下执行。Monads 允许我们在隐藏解析器状态的同时组合解析器,从而减少混乱并提高可组合性。
我们创建的框架很慢并且不会产生任何错误消息,这是为了保持代码简洁。 FParsec 提供可接受的性能以及出色的错误消息。
然而,仅凭一个例子无法理解 Monads。一个人必须练习 Monads。
以下是你可以尝试实施的 Monads 的一些示例,以达到你的理解:
- State Monad - 允许隐式环境状态被隐式携带
- Tracer Monad - 允许隐式携带跟踪状态。State Monad 的变种
- Turtle Monad - 用于创建 Turtle(Logos)程序的 Monad。State Monad 的变种
- 继续 Monad - 一个协程 Monad。这方面的一个例子是 F#中的 async
要学习的最好方法是在你熟悉的域中提出 Monads 的应用程序。对我来说,这是解析器。
完整源代码:
// A Parser<'T> is a function that takes a string and position
//  and returns an optionally parsed value and a position
//  A parsed value means the position points to the character following the parsed value
//  No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)
// Runs a parser 't' on the input string 's'
let run t s =
  let (Parser tps) = t
  tps (s, 0)
// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt        p = None  , p
// The 'satisfy' parser succeeds if the character at the current position 
//  passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
  if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
  else failAt p
// 'eos' succeeds if the position is beyond the last character.
//  Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
  if p < s.Length then failAt p
  else succeedWith () p
let anyChar       = satisfy (fun _ -> true)
let char ch       = satisfy ((=) ch)
let digit         = satisfy System.Char.IsDigit
let letter        = satisfy System.Char.IsLetter
// 'fail' is a parser that always fails
let fail<'T>      = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v     = Parser <| fun (s, p) -> succeedWith v p
// 'bind' let us combine two parser into a more complex parser
let bind t uf     = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> None, tp
  | Some tv ->
    let u = uf tv
    let (Parser ups) = u
    ups (s, tp)
type ParserBuilder() =
  member x.Bind       (t, uf) = bind      t   uf
  member x.Return     v       = return_   v
  member x.ReturnFrom t       = t
// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()
// 'orElse' creates a parser that runs parser 't' first, if that is successful
//  the result is returned otherwise the result of parser 'u' is returned
let orElse t u    = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> 
    let (Parser ups) = u
    ups (s, p)
  | Some tv -> succeedWith tv tp
let (>>=) t uf    = bind t uf
let (<|>) t u     = orElse t u
// 'map' runs parser 't' and maps the result using 'm'
let map m t       = t >>= (m >> return_)
let (>>!) t m     = map m t
let (>>%) t v     = t >>! (fun _ -> v)
// 'opt' takes a parser 't' and creates a parser that always succeed but
//  if parser 't' fails the new parser will produce the value 'None'
let opt t         = (t >>! Some) <|> (return_ None)
// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u      = 
  parser {
    let! tv = t
    let! tu = u
    return tv, tu
  }
// 'many' applies parser 't' until it fails and returns all successful
//  parser results as a list
let many t =
  let ot = opt t
  let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
  loop []
// 'sepBy' applies parser 't' separated by 'sep'. 
//  The values are reduced with the function 'sep' returns
let sepBy t sep     =
  let ots = opt (pair sep t)
  let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
  t >>= loop
// A simplistic integer expression parser
// 'pint' parses an integer
let pint = 
  let f s v = 10*s + int v - int '0'
  parser {
    let! digits = many digit
    return! 
      match digits with
      | [] -> fail
      | vs -> return_ (List.fold f 0 vs)
  }
// operator parsers, note that the parser result is the operator function 
let padd      = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide   = char '/' >>% (/)
let pmodulus  = char '%' >>% (%)
// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike  = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike  = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr     =
  parser {
    let! v = paddlike
    let! _ = eos      // To make sure the full string is consumed
    return v
  }