F# 库是否包含优先级队列?其他人可以指出我在 F# 中的优先级队列的实现吗?
8 回答
查看http://lepensemoi.free.fr/index.php/tag/data-structure,了解各种数据结构的一大堆 F# 实现。
令人惊讶的是,在过去的七年中,接受的答案几乎仍然适用于 F# 的所有更改,除了不再有 Pervasives.compare 函数并且“比较”函数现在已合并到基本运算符中Microsoft.FSharp.Core.Operators.compare。
也就是说,该引用的博客条目将二项式堆实现为通用堆,而不是优先级队列的特定要求,即优先级不需要泛型类型,它可以是整数类型以提高比较效率,并且它谈到但没有实现额外的改进,以将最小值保留为单独的字段,以提高仅检查队列中最高优先级项目的效率。
下面的模块代码实现了从该代码派生的二项式堆优先级队列,具有更高的效率,它不使用通用比较进行优先级比较,并且更有效的 O(1) 方法来检查队列的顶部(尽管在插入和删除条目的更多开销成本,尽管它们仍然是 O(log n) - n 是队列中的条目数)。此代码更适合优先级队列的通常应用,其中队列顶部的读取频率高于插入和/或顶部项目删除的执行频率。请注意,当删除顶部元素并将其重新插入队列的下方时,它不如 MinHeap 有效,因为必须以更多的计算开销执行完整的“deleteMin”和“insert”。
[<RequireQualifiedAccess>]
module BinomialHeapPQ =
// type 'a treeElement = Element of uint32 * 'a
type 'a treeElement = class val k:uint32 val v:'a new(k,v) = { k=k;v=v } end
type 'a tree = Node of uint32 * 'a treeElement * 'a tree list
type 'a heap = 'a tree list
type 'a outerheap = | HeapEmpty | HeapNotEmpty of 'a treeElement * 'a heap
let empty = HeapEmpty
let isEmpty = function | HeapEmpty -> true | _ -> false
let inline private rank (Node(r,_,_)) = r
let inline private root (Node(_,x,_)) = x
exception Empty_Heap
let getMin = function | HeapEmpty -> None
| HeapNotEmpty(min,_) -> Some min
let rec private findMin heap =
match heap with | [] -> raise Empty_Heap //guarded so should never happen
| [node] -> root node,[]
| topnode::heap' ->
let min,subheap = findMin heap' in let rtn = root topnode
match subheap with
| [] -> if rtn.k > min.k then min,[] else rtn,[]
| minnode::heap'' ->
let rmn = root minnode
if rtn.k <= rmn.k then rtn,heap
else rmn,minnode::topnode::heap''
let private mergeTree (Node(r,kv1,ts1) as tree1) (Node (_,kv2,ts2) as tree2) =
if kv1.k > kv2.k then Node(r+1u,kv2,tree1::ts2)
else Node(r+1u,kv1,tree2::ts1)
let rec private insTree (newnode: 'a tree) heap =
match heap with
| [] -> [newnode]
| topnode::heap' -> if (rank newnode) < (rank topnode) then newnode::heap
else insTree (mergeTree newnode topnode) heap'
let insert k v = let kv = treeElement(k,v) in let nn = Node(0u,kv,[])
function | HeapEmpty -> HeapNotEmpty(kv,[nn])
| HeapNotEmpty(min,heap) -> let nmin = if k > min.k then min else kv
HeapNotEmpty(nmin,insTree nn heap)
let rec private merge' heap1 heap2 = //doesn't guaranty minimum tree node as head!!!
match heap1,heap2 with
| _,[] -> heap1
| [],_ -> heap2
| topheap1::heap1',topheap2::heap2' ->
match compare (rank topheap1) (rank topheap2) with
| -1 -> topheap1::merge' heap1' heap2
| 1 -> topheap2::merge' heap1 heap2'
| _ -> insTree (mergeTree topheap1 topheap2) (merge' heap1' heap2')
let merge oheap1 oheap2 = match oheap1,oheap2 with
| _,HeapEmpty -> oheap1
| HeapEmpty,_ -> oheap2
| HeapNotEmpty(min1,heap1),HeapNotEmpty(min2,heap2) ->
let min = if min1.k > min2.k then min2 else min1
HeapNotEmpty(min,merge' heap1 heap2)
let rec private removeMinTree = function
| [] -> raise Empty_Heap // will never happen as already guarded
| [node] -> node,[]
| t::ts -> let t',ts' = removeMinTree ts
if (root t).k <= (root t').k then t,ts else t',t::ts'
let deleteMin =
function | HeapEmpty -> HeapEmpty
| HeapNotEmpty(_,heap) ->
match heap with
| [] -> HeapEmpty // should never occur: non empty heap with no elements
| [Node(_,_,heap')] -> match heap' with
| [] -> HeapEmpty
| _ -> let min,_ = findMin heap'
HeapNotEmpty(min,heap')
| _::_ -> let Node(_,_,ts1),ts2 = removeMinTree heap
let nheap = merge' (List.rev ts1) ts2 in let min,_ = findMin nheap
HeapNotEmpty(min,nheap)
let reinsertMinAs k v pq = insert k v (deleteMin pq)
请注意,有两个“treeElement”类型的选项,以适应测试的方式。在我的回答中提到的关于使用优先级队列筛选素数的应用程序中,上述代码比 MinHeap 的功能实现慢约 80%(非多处理模式,因为上述二项式堆不适合- 位置调整);这是因为二项式堆的“删除后插入”操作的额外计算复杂性,而不是为 MinHeap 实现有效组合这些操作的能力。
因此,MinHeap Priority Queue 更适合这种类型的应用程序以及需要有效的就地调整的情况,而 Binomial Heap Priority Queue 更适合需要能够有效地将两个队列合并为一个的情况。
FSharpx.Collections 包括一个功能性堆集合https://github.com/fsharp/fsharpx/blob/master/src/FSharpx.Core/Collections/Heap.fsi以及它的 PriortityQueue 接口https://github.com /fsharp/fsharpx/blob/master/src/FSharpx.Core/Collections/PriorityQueue.fs
已编辑:更正纯函数版本的 deleteMin 函数中的错误并添加 ofSeq 函数。
我在关于 F# prime sieves 的回答中实现了基于 MinHeap Binary Heap 的优先级队列的两个版本,第一个是纯功能代码(较慢),第二个是基于数组(ResizeArray,它建立在 DotNet 列表上,内部使用用于存储列表的数组)。非功能版本在某种程度上是合理的,因为 MinHeap 通常在 400 多年前由Michael Eytzinger发明的基于家谱树的模型之后实现为可变数组二进制堆。
在那个答案中,我没有实现“从队列中删除最高优先级项目”功能,因为算法不需要它,但我确实实现了一个“重新插入队列中最下方的项目”功能,因为算法确实需要它,而且功能与“deleteMin”功能所需的功能非常相似;不同之处在于,不是用新参数重新插入顶部的“最小”项目,而是从队列中删除最后一个项目(以与插入新项目时类似的方式找到,但更简单),然后重新插入该项目以替换顶部队列中的(最小)项目(只需调用“reinsertMinAt”函数)。我还实现了一个“调整”函数,该函数将一个函数应用于所有队列元素,然后重新堆积最终结果以提高效率,
在下面的代码中,我实现了上述“deleteMin”函数以及“ofSeq”函数,该函数可用于从使用内部“reheapify”函数的优先级/内容元组对元素序列构建新队列为了效率。
通过将大于符号更改为小于符号,可以轻松地将根据此代码的 MinHeap 更改为“MaxHeap”,反之亦然,在与优先级“k”值相关的比较中。Min/Max Heap 支持相同无符号整数“Key”优先级的多个元素,但不保留具有相同优先级的条目的顺序;换句话说,如果有其他具有与我不需要的优先级相同的条目并且当前代码更有效,则无法保证进入队列的第一个元素将是弹出到最小位置的第一个元素. 如果需要,可以修改代码以保留顺序(继续向下移动新插入,直到过去任何具有相同优先级的条目)。
Min/Max Heap Priority Queue 的优点是与其他类型的非简单队列相比,它具有较少的计算复杂性开销,在 O(1) 时间内产生 Min 或 Max(取决于 MinHeap 还是 MaxHeap 实现),插入和删除的最坏情况是 O(log n) 时间,而调整和构建只需要 O(n) 时间,其中“n”是当前队列中的元素数。“resinsertMinAs”函数优于删除然后插入的优点是,它将最坏情况的时间从两倍减少到 O(log n),并且通常比重新插入通常在队列开头附近更好,因此不需要完全扫描。
与带有指向最小值的指针的附加选项以产生 O(1) 查找最小值性能的二项式堆相比,MinHeap 可能稍微简单一些,因此在执行相同的工作时更快,尤其是在不需要的情况下二项式堆提供的“合并堆”功能。与使用 MinHeap 相比,使用二项式堆“合并”函数“重新插入 MinAs”可能需要更长的时间,因为看起来通常需要进行更多的比较。
MinHeap Priority Queue 特别适用于 Eratosthenes 的增量筛的问题,就像在另一个链接的答案中一样,并且可能是 Melissa E. O'Neill在她的论文中使用的队列,表明特纳素筛是无论是算法还是性能,都不是埃拉托色尼筛法。
以下纯函数代码将“deleteMin”和“ofSeq”函数添加到该代码:
[<RequireQualifiedAccess>]
module MinHeap =
type MinHeapTreeEntry<'T> = class val k:uint32 val v:'T new(k,v) = { k=k;v=v } end
[<CompilationRepresentation(CompilationRepresentationFlags.UseNullAsTrueValue)>]
[<NoEquality; NoComparison>]
type MinHeapTree<'T> =
| HeapEmpty
| HeapOne of MinHeapTreeEntry<'T>
| HeapNode of MinHeapTreeEntry<'T> * MinHeapTree<'T> * MinHeapTree<'T> * uint32
let empty = HeapEmpty
let getMin pq = match pq with | HeapOne(kv) | HeapNode(kv,_,_,_) -> Some kv | _ -> None
let insert k v pq =
let kv = MinHeapTreeEntry(k,v)
let rec insert' kv msk pq =
match pq with
| HeapEmpty -> HeapOne kv
| HeapOne kvn -> if k < kvn.k then HeapNode(kv,pq,HeapEmpty,2u)
else HeapNode(kvn,HeapOne kv,HeapEmpty,2u)
| HeapNode(kvn,l,r,cnt) ->
let nc = cnt + 1u
let nmsk = if msk <> 0u then msk <<< 1 else
let s = int32 (System.Math.Log (float nc) / System.Math.Log(2.0))
(nc <<< (32 - s)) ||| 1u //never ever zero again with the or'ed 1
if k <= kvn.k then if (nmsk &&& 0x80000000u) = 0u then HeapNode(kv,insert' kvn nmsk l,r,nc)
else HeapNode(kv,l,insert' kvn nmsk r,nc)
else if (nmsk &&& 0x80000000u) = 0u then HeapNode(kvn,insert' kv nmsk l,r,nc)
else HeapNode(kvn,l,insert' kv nmsk r,nc)
insert' kv 0u pq
let private reheapify kv k pq =
let rec reheapify' pq =
match pq with
| HeapEmpty | HeapOne _ -> HeapOne kv
| HeapNode(kvn,l,r,cnt) ->
match r with
| HeapOne kvr when k > kvr.k ->
match l with //never HeapEmpty
| HeapOne kvl when k > kvl.k -> //both qualify, choose least
if kvl.k > kvr.k then HeapNode(kvr,l,HeapOne kv,cnt)
else HeapNode(kvl,HeapOne kv,r,cnt)
| HeapNode(kvl,_,_,_) when k > kvl.k -> //both qualify, choose least
if kvl.k > kvr.k then HeapNode(kvr,l,HeapOne kv,cnt)
else HeapNode(kvl,reheapify' l,r,cnt)
| _ -> HeapNode(kvr,l,HeapOne kv,cnt) //only right qualifies
| HeapNode(kvr,_,_,_) when k > kvr.k -> //need adjusting for left leaf or else left leaf
match l with //never HeapEmpty or HeapOne
| HeapNode(kvl,_,_,_) when k > kvl.k -> //both qualify, choose least
if kvl.k > kvr.k then HeapNode(kvr,l,reheapify' r,cnt)
else HeapNode(kvl,reheapify' l,r,cnt)
| _ -> HeapNode(kvr,l,reheapify' r,cnt) //only right qualifies
| _ -> match l with //r could be HeapEmpty but l never HeapEmpty
| HeapOne(kvl) when k > kvl.k -> HeapNode(kvl,HeapOne kv,r,cnt)
| HeapNode(kvl,_,_,_) when k > kvl.k -> HeapNode(kvl,reheapify' l,r,cnt)
| _ -> HeapNode(kv,l,r,cnt) //just replace the contents of pq node with sub leaves the same
reheapify' pq
let reinsertMinAs k v pq =
let kv = MinHeapTreeEntry(k,v)
reheapify kv k pq
let deleteMin pq =
let rec delete' kv msk pq =
match pq with
| HeapEmpty -> kv,empty //should never get here as should flock off up before an empty is reached
| HeapOne kvn -> kvn,empty
| HeapNode(kvn,l,r,cnt) ->
let nmsk = if msk <> 0u then msk <<< 1 else
let s = int32 (System.Math.Log (float cnt) / System.Math.Log(2.0))
(cnt <<< (32 - s)) ||| 1u //never ever zero again with the or'ed 1
if (nmsk &&& 0x80000000u) = 0u then let kvl,pql = delete' kvn nmsk l
match pql with
| HeapEmpty -> kvl,HeapOne kvn
| HeapOne _ | HeapNode _ -> kvl,HeapNode(kvn,pql,r,cnt - 1u)
else let kvr,pqr = delete' kvn nmsk r
kvr,HeapNode(kvn,l,pqr,cnt - 1u)
match pq with
| HeapEmpty | HeapOne _ -> empty //for the case of deleting from queue either empty or one entry
| HeapNode(kv,_,_,cnt) -> let nkv,npq = delete' kv 0u pq in reinsertMinAs nkv.k nkv.v npq
let adjust f (pq:MinHeapTree<_>) = //adjust all the contents using the function, then rebuild by reheapify
let rec adjust' pq =
match pq with
| HeapEmpty -> pq
| HeapOne kv -> HeapOne(MinHeapTreeEntry(f kv.k kv.v))
| HeapNode (kv,l,r,cnt) -> let nkv = MinHeapTreeEntry(f kv.k kv.v)
reheapify nkv nkv.k (HeapNode(kv,adjust' l,adjust' r,cnt))
adjust' pq
let ofSeq (sq:seq<MinHeapTreeEntry<_>>) =
let cnt = sq |> Seq.length |> uint32 in let hcnt = cnt / 2u in let nmrtr = sq.GetEnumerator()
let rec build' i =
if nmrtr.MoveNext() && i <= cnt then
if i > hcnt then HeapOne(nmrtr.Current)
else let i2 = i + i in HeapNode(nmrtr.Current,build' i2,build' (i2 + 1u),cnt - i)
else HeapEmpty
build' 1u
以下代码将 deleteMin 和 ofSeq 函数添加到基于数组的版本中:
[<RequireQualifiedAccess>]
module MinHeap =
type MinHeapTreeEntry<'T> = class val k:uint32 val v:'T new(k,v) = { k=k;v=v } end
type MinHeapTree<'T> = ResizeArray<MinHeapTreeEntry<'T>>
let empty<'T> = MinHeapTree<MinHeapTreeEntry<'T>>()
let getMin (pq:MinHeapTree<_>) = if pq.Count > 0 then Some pq.[0] else None
let insert k v (pq:MinHeapTree<_>) =
if pq.Count = 0 then pq.Add(MinHeapTreeEntry(0xFFFFFFFFu,v)) //add an extra entry so there's always a right max node
let mutable nxtlvl = pq.Count in let mutable lvl = nxtlvl <<< 1 //1 past index of value added times 2
pq.Add(pq.[nxtlvl - 1]) //copy bottom entry then do bubble up while less than next level up
while ((lvl <- lvl >>> 1); nxtlvl <- nxtlvl >>> 1; nxtlvl <> 0) do
let t = pq.[nxtlvl - 1] in if t.k > k then pq.[lvl - 1] <- t else lvl <- lvl <<< 1; nxtlvl <- 0 //causes loop break
pq.[lvl - 1] <- MinHeapTreeEntry(k,v); pq
let reinsertMinAs k v (pq:MinHeapTree<_>) = //do minify down for value to insert
let mutable nxtlvl = 1 in let mutable lvl = nxtlvl in let cnt = pq.Count
while (nxtlvl <- nxtlvl <<< 1; nxtlvl < cnt) do
let lk = pq.[nxtlvl - 1].k in let rk = pq.[nxtlvl].k in let oldlvl = lvl
let k = if k > lk then lvl <- nxtlvl; lk else k in if k > rk then nxtlvl <- nxtlvl + 1; lvl <- nxtlvl
if lvl <> oldlvl then pq.[oldlvl - 1] <- pq.[lvl - 1] else nxtlvl <- cnt //causes loop break
pq.[lvl - 1] <- MinHeapTreeEntry(k,v); pq
let deleteMin (pq:MinHeapTree<_>) =
if pq.Count <= 2 then empty else //if contains one or less entries, return empty queue
let btmi = pq.Count - 2 in let btm = pq.[btmi] in pq.RemoveAt btmi
reinsertMinAs btm.k btm.v pq
let adjust f (pq:MinHeapTree<_>) = //adjust all the contents using the function, then re-heapify
if pq <> null then
let cnt = pq.Count
if cnt > 1 then
for i = 0 to cnt - 2 do //change contents using function
let e = pq.[i] in let k,v = e.k,e.v in pq.[i] <- MinHeapTreeEntry (f k v)
for i = cnt/2 downto 1 do //rebuild by reheapify
let kv = pq.[i - 1] in let k = kv.k
let mutable nxtlvl = i in let mutable lvl = nxtlvl
while (nxtlvl <- nxtlvl <<< 1; nxtlvl < cnt) do
let lk = pq.[nxtlvl - 1].k in let rk = pq.[nxtlvl].k in let oldlvl = lvl
let k = if k > lk then lvl <- nxtlvl; lk else k in if k > rk then nxtlvl <- nxtlvl + 1; lvl <- nxtlvl
if lvl <> oldlvl then pq.[oldlvl - 1] <- pq.[lvl - 1] else nxtlvl <- cnt //causes loop break
pq.[lvl - 1] <- kv
pq
The Monad.Reader的第 16期中讨论了优先级队列的功能数据结构,这很有趣。
它包括对快速且非常容易实现的配对堆的描述。
只需使用具有唯一 int 的 F#Set
元素类型对(以允许重复)并使用set.MinElement
or提取元素set.MaxElement
。所有相关操作都是 O(log n) 时间复杂度。如果您确实需要 O(1) 重复访问最小元素,您可以简单地缓存它,如果找到新的最小元素,则在插入时更新缓存。
您可以尝试多种堆数据结构(倾斜堆、展开堆、配对堆、二项式堆、倾斜二项式堆、上述的自举变体)。有关它们的设计、实现和实际性能的详细分析,请参阅The F#.NET Journal中的文章数据结构:堆。
使用 F#,您可以使用任何 .NET 库,因此如果您可以使用不是用 F# 编写的实现,我将Wintellect Power Collection Library。
这里有一个二项式堆的实现,它是实现优先级队列的常用数据结构。