理解 Monads 來自實踐
本主題適用於中級到高階 F#開發人員
“什麼是 Monads?” 是一個常見的問題。這很容易回答, 但就像 Hitchhikers 指導銀河系一樣,我們意識到我們不理解答案,因為我們不知道我們要求的是什麼。
許多人認為理解 Monads 的方法是練習它們。作為程式設計師,我們通常不關心 Liskov 的替換原則,子型別或子類的數學基礎。通過使用這些想法,我們獲得了他們所代表的直覺。Monads 也是如此。
為了幫助你開始使用 Monads,此示例演示瞭如何構建 Monadic Parser Combinator 庫。這可能有助於你入門,但理解將來自編寫你自己的 Monadic 庫。
足夠的散文,程式碼的時間
解析器型別:
// A Parser<'T> is a function that takes a string and position
// and returns an optionally parsed value and a position
// A parsed value means the position points to the character following the parsed value
// No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)
使用 Parser 的這個定義,我們定義了一些基本的解析器函式
// Runs a parser 't' on the input string 's'
let run t s =
let (Parser tps) = t
tps (s, 0)
// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt p = None , p
// The 'satisfy' parser succeeds if the character at the current position
// passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
else failAt p
// 'eos' succeeds if the position is beyond the last character.
// Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
if p < s.Length then failAt p
else succeedWith () p
let anyChar = satisfy (fun _ -> true)
let char ch = satisfy ((=) ch)
let digit = satisfy System.Char.IsDigit
let letter = satisfy System.Char.IsLetter
satisfy
是一個函式,如果我們沒有通過 EOS
並且當前位置的字元通過 sat
函式,則給定 sat
函式會生成一個成功的解析器。使用 satisfy
,我們建立了許多有用的字元解析器。
在 FSI 中執行:
> run digit "";;
val it : char option * int = (null, 0)
> run digit "123";;
val it : char option * int = (Some '1', 1)
> run digit "hello";;
val it : char option * int = (null, 0)
我們有一些基本的解析器。我們將使用解析器組合函式將它們組合成更強大的解析器
// 'fail' is a parser that always fails
let fail<'T> = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v = Parser <| fun (s, p) -> succeedWith v p
// 'bind' let us combine two parser into a more complex parser
let bind t uf = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None -> None, tp
| Some tv ->
let u = uf tv
let (Parser ups) = u
ups (s, tp)
名稱和簽名不是任意選擇的, 但我們不會深思熟慮,而是讓我們看看我們如何使用 bind
將解析器組合成更復雜的解析器:
> run (bind digit (fun v -> digit)) "123";;
val it : char option * int = (Some '2', 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "123";;
val it : (char * char) option * int = (Some ('1', '2'), 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "1";;
val it : (char * char) option * int = (null, 1)
這告訴我們的是 bind
允許我們將兩個解析器組合成一個更復雜的解析器。由於 bind
的結果是一個解析器,反過來可以再次組合。
> run (bind digit (fun v -> bind digit (fun w -> bind digit (fun u -> return_ (v,w,u))))) "123";;
val it : (char * char * char) option * int = (Some ('1', '2', '3'), 3)
雖然我們將定義輔助函式來簡化語法,但 bind
將是我們組合解析器的基本方法。
可以簡化語法的一個方面是計算表示式 。它們很容易定義:
type ParserBuilder() =
member x.Bind (t, uf) = bind t uf
member x.Return v = return_ v
member x.ReturnFrom t = t
// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()
FSI
let p = parser {
let! v = digit
let! u = digit
return v,u
}
run p "123"
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)
這相當於:
> let p = bind digit (fun v -> bind digit (fun u -> return_ (v,u)))
run p "123";;
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)
我們將要使用的另一個基本的解析器組合器是 orElse
:
// 'orElse' creates a parser that runs parser 't' first, if that is successful
// the result is returned otherwise the result of parser 'u' is returned
let orElse t u = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None ->
let (Parser ups) = u
ups (s, p)
| Some tv -> succeedWith tv tp
這允許我們像這樣定義 letterOrDigit
:
> let letterOrDigit = orElse letter digit;;
val letterOrDigit : Parser<char> = Parser <fun:orElse@70-1>
> run letterOrDigit "123";;
val it : char option * int = (Some '1', 1)
> run letterOrDigit "hello";;
val it : char option * int = (Some 'h', 1)
> run letterOrDigit "!!!";;
val it : char option * int = (null, 0)
關於 Infix 運算子的說明
對 FP 的一個共同關注點是使用不常用的中綴運算子,如 >>=
,>=>
,<-
等。然而,大多數人並不關心使用+
,-
,*
,/
和%
,這些都是眾所周知的運算子,用於組成值。然而,FP 中的一個重要部分不僅是組成值,還包括函式。對於中級 FP 開發者來說,中綴運算子 >>=
,>=>
,<-
是眾所周知的,應該具有特定的簽名和語義。
對於我們到目前為止定義的函式,我們將定義以下用於組合解析器的中綴運算子:
let (>>=) t uf = bind t uf
let (<|>) t u = orElse t u
所以 >>=
意味著 bind
而 <|>
意味著 orElse
。
這允許我們組合解析器更簡潔:
let letterOrDigit = letter <|> digit
let p = digit >>= fun v -> digit >>= fun u -> return_ (v,u)
為了定義一些高階解析器組合器,它們將允許我們解析更復雜的表示式,我們定義了一些更簡單的解析器組合器:
// 'map' runs parser 't' and maps the result using 'm'
let map m t = t >>= (m >> return_)
let (>>!) t m = map m t
let (>>%) t v = t >>! (fun _ -> v)
// 'opt' takes a parser 't' and creates a parser that always succeed but
// if parser 't' fails the new parser will produce the value 'None'
let opt t = (t >>! Some) <|> (return_ None)
// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u =
parser {
let! tv = t
let! tu = u
return tv, tu
}
我們已經準備好定義 many
和 sepBy
,它們在應用輸入解析器時會更加高階,直到它們失敗。然後 many
和 sepBy
返回聚合結果:
// 'many' applies parser 't' until it fails and returns all successful
// parser results as a list
let many t =
let ot = opt t
let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
loop []
// 'sepBy' applies parser 't' separated by 'sep'.
// The values are reduced with the function 'sep' returns
let sepBy t sep =
let ots = opt (pair sep t)
let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
t >>= loop
建立一個簡單的表示式解析器
使用我們建立的工具,我們現在可以為像 1+2*3
這樣的簡單表示式定義解析器
我們從底部開始,為整數 pint
定義一個解析器
// 'pint' parses an integer
let pint =
let f s v = 10*s + int v - int '0'
parser {
let! digits = many digit
return!
match digits with
| [] -> fail
| vs -> return_ (List.fold f 0 vs)
}
我們嘗試儘可能多地解析數字,結果是 char list
。如果列表為空,我們 fail
,否則我們將字元摺疊成整數。
在 FSI 中測試 pint
:
> run pint "123";;
val it : int option * int = (Some 123, 3)
另外,我們需要解析用於組合整數值的不同型別的運算子:
// operator parsers, note that the parser result is the operator function
let padd = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide = char '/' >>% (/)
let pmodulus = char '%' >>% (%)
FSI:
> run padd "+";;
val it : (int -> int -> int) option * int = (Some <fun:padd@121-1>, 1)
把它們繫結在一起:
// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr =
parser {
let! v = paddlike
let! _ = eos // To make sure the full string is consumed
return v
}
在 FSI 中執行它:
> run pexpr "2+123*2-3";;
val it : int option * int = (Some 245, 9)
結論
通過定義 Parser<'T>
,return_
,bind
並確保它們遵守 monadic 定律, 我們構建了一個簡單但強大的 Monadic Parser Combinator 框架。
Monads 和 Parsers 一起使用,因為 Parsers 在解析器狀態下執行。Monads 允許我們在隱藏解析器狀態的同時組合解析器,從而減少混亂並提高可組合性。
我們建立的框架很慢並且不會產生任何錯誤訊息,這是為了保持程式碼簡潔。 FParsec 提供可接受的效能以及出色的錯誤訊息。
然而,僅憑一個例子無法理解 Monads。一個人必須練習 Monads。
以下是你可以嘗試實施的 Monads 的一些示例,以達到你的理解:
- State Monad - 允許隱式環境狀態被隱式攜帶
- Tracer Monad - 允許隱式攜帶跟蹤狀態。State Monad 的變種
- Turtle Monad - 用於建立
Turtle(Logos)
程式的 Monad。State Monad 的變種 - 繼續 Monad - 一個協程 Monad。這方面的一個例子是 F#中的
async
要學習的最好方法是在你熟悉的域中提出 Monads 的應用程式。對我來說,這是解析器。
完整原始碼:
// A Parser<'T> is a function that takes a string and position
// and returns an optionally parsed value and a position
// A parsed value means the position points to the character following the parsed value
// No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)
// Runs a parser 't' on the input string 's'
let run t s =
let (Parser tps) = t
tps (s, 0)
// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt p = None , p
// The 'satisfy' parser succeeds if the character at the current position
// passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
else failAt p
// 'eos' succeeds if the position is beyond the last character.
// Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
if p < s.Length then failAt p
else succeedWith () p
let anyChar = satisfy (fun _ -> true)
let char ch = satisfy ((=) ch)
let digit = satisfy System.Char.IsDigit
let letter = satisfy System.Char.IsLetter
// 'fail' is a parser that always fails
let fail<'T> = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v = Parser <| fun (s, p) -> succeedWith v p
// 'bind' let us combine two parser into a more complex parser
let bind t uf = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None -> None, tp
| Some tv ->
let u = uf tv
let (Parser ups) = u
ups (s, tp)
type ParserBuilder() =
member x.Bind (t, uf) = bind t uf
member x.Return v = return_ v
member x.ReturnFrom t = t
// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()
// 'orElse' creates a parser that runs parser 't' first, if that is successful
// the result is returned otherwise the result of parser 'u' is returned
let orElse t u = Parser <| fun (s, p) ->
let (Parser tps) = t
let tov, tp = tps (s, p)
match tov with
| None ->
let (Parser ups) = u
ups (s, p)
| Some tv -> succeedWith tv tp
let (>>=) t uf = bind t uf
let (<|>) t u = orElse t u
// 'map' runs parser 't' and maps the result using 'm'
let map m t = t >>= (m >> return_)
let (>>!) t m = map m t
let (>>%) t v = t >>! (fun _ -> v)
// 'opt' takes a parser 't' and creates a parser that always succeed but
// if parser 't' fails the new parser will produce the value 'None'
let opt t = (t >>! Some) <|> (return_ None)
// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u =
parser {
let! tv = t
let! tu = u
return tv, tu
}
// 'many' applies parser 't' until it fails and returns all successful
// parser results as a list
let many t =
let ot = opt t
let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
loop []
// 'sepBy' applies parser 't' separated by 'sep'.
// The values are reduced with the function 'sep' returns
let sepBy t sep =
let ots = opt (pair sep t)
let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
t >>= loop
// A simplistic integer expression parser
// 'pint' parses an integer
let pint =
let f s v = 10*s + int v - int '0'
parser {
let! digits = many digit
return!
match digits with
| [] -> fail
| vs -> return_ (List.fold f 0 vs)
}
// operator parsers, note that the parser result is the operator function
let padd = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide = char '/' >>% (/)
let pmodulus = char '%' >>% (%)
// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr =
parser {
let! v = paddlike
let! _ = eos // To make sure the full string is consumed
return v
}