如何优化这个次优的 Set-Cover 解决方案?

发布于 2024-09-26 23:03:34 字数 3263 浏览 7 评论 0原文

我编写这个程序是为了测试“解决”设置覆盖问题需要多长时间。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using MoreLinq;

namespace SetCover
{
    class Program
    {
        const int maxNumItems = 10000;
        const int numSets = 5000;
        const int maxItemsPerSet = 300;

        static void Main(string[] args)
        {
            var rand = new Random();
            var sets = new List<HashSet<int>>(numSets);
            var cover = new List<HashSet<int>>(numSets);
            var universe = new HashSet<int>();
            HashSet<int> remaining;
            var watch = new Stopwatch();


            Console.Write("Generating sets...");
            for (int i = 0; i < numSets; ++i)
            {
                int numItemsInSet = rand.Next(1, maxItemsPerSet);
                sets.Add(new HashSet<int>());

                for (int j = 0; j < numItemsInSet; ++j)
                {
                    sets[i].Add(rand.Next(maxNumItems));
                }
            }
            Console.WriteLine("Done!");

            Console.Write("Computing universe...");
            foreach (var set in sets)
                foreach (var item in set)
                    universe.Add(item);
            Console.WriteLine("Found {0} items.", universe.Count);

            watch.Start();

            //Console.Write("Removing subsets...");
            //int numSetsRemoved = sets.RemoveAll(subset => sets.Any(superset => subset != superset && subset.IsSubsetOf(superset)));
            //Console.WriteLine("Removed {0} subsets.", numSetsRemoved);


            //Console.Write("Sorting sets...");
            //sets = sets.OrderByDescending(s => s.Count).ToList();
            //Console.WriteLine("{0} elements in largest set.", sets[0].Count);


            Console.WriteLine("Computing cover...");
            remaining = universe.ToHashSet();
            while (remaining.Any())
            {
                Console.Write("  Finding set {0}...", cover.Count + 1);
                var nextSet = sets.MaxBy(s => s.Intersect(remaining).Count());
                remaining.ExceptWith(nextSet);
                cover.Add(nextSet);
                Console.WriteLine("{0} elements remaining.", remaining.Count);
            }
            Console.WriteLine("{0} sets in cover.", cover.Count);

            watch.Stop();

            Console.WriteLine("Computed cover in {0} seconds.", watch.Elapsed.TotalSeconds);

            Console.ReadLine();
        }
    }

    public static class Extensions
    {
        public static HashSet<TValue> Clone<TValue>(this HashSet<TValue> set)
        {
            var tmp = new TValue[set.Count];
            set.CopyTo(tmp, 0);
            return new HashSet<TValue>(tmp);
        }

        public static HashSet<TSource> ToHashSet<TSource>(this IEnumerable<TSource> source)
        {
            return new HashSet<TSource>(source);
        }
    }
}

这只是一个贪心的次优解,但运行起来仍然需要 147 秒。然而,我认为这个解决方案应该非常接近最佳,因此对于我的目的来说它应该足够好。我怎样才能加快速度呢?

我注释掉了几行,因为它们弊大于利。 编辑:计算宇宙实际上不应该与可以提前知道的时间分开。

I wrote this program to test how long it would take to "solve" the set-cover problem.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using MoreLinq;

namespace SetCover
{
    class Program
    {
        const int maxNumItems = 10000;
        const int numSets = 5000;
        const int maxItemsPerSet = 300;

        static void Main(string[] args)
        {
            var rand = new Random();
            var sets = new List<HashSet<int>>(numSets);
            var cover = new List<HashSet<int>>(numSets);
            var universe = new HashSet<int>();
            HashSet<int> remaining;
            var watch = new Stopwatch();


            Console.Write("Generating sets...");
            for (int i = 0; i < numSets; ++i)
            {
                int numItemsInSet = rand.Next(1, maxItemsPerSet);
                sets.Add(new HashSet<int>());

                for (int j = 0; j < numItemsInSet; ++j)
                {
                    sets[i].Add(rand.Next(maxNumItems));
                }
            }
            Console.WriteLine("Done!");

            Console.Write("Computing universe...");
            foreach (var set in sets)
                foreach (var item in set)
                    universe.Add(item);
            Console.WriteLine("Found {0} items.", universe.Count);

            watch.Start();

            //Console.Write("Removing subsets...");
            //int numSetsRemoved = sets.RemoveAll(subset => sets.Any(superset => subset != superset && subset.IsSubsetOf(superset)));
            //Console.WriteLine("Removed {0} subsets.", numSetsRemoved);


            //Console.Write("Sorting sets...");
            //sets = sets.OrderByDescending(s => s.Count).ToList();
            //Console.WriteLine("{0} elements in largest set.", sets[0].Count);


            Console.WriteLine("Computing cover...");
            remaining = universe.ToHashSet();
            while (remaining.Any())
            {
                Console.Write("  Finding set {0}...", cover.Count + 1);
                var nextSet = sets.MaxBy(s => s.Intersect(remaining).Count());
                remaining.ExceptWith(nextSet);
                cover.Add(nextSet);
                Console.WriteLine("{0} elements remaining.", remaining.Count);
            }
            Console.WriteLine("{0} sets in cover.", cover.Count);

            watch.Stop();

            Console.WriteLine("Computed cover in {0} seconds.", watch.Elapsed.TotalSeconds);

            Console.ReadLine();
        }
    }

    public static class Extensions
    {
        public static HashSet<TValue> Clone<TValue>(this HashSet<TValue> set)
        {
            var tmp = new TValue[set.Count];
            set.CopyTo(tmp, 0);
            return new HashSet<TValue>(tmp);
        }

        public static HashSet<TSource> ToHashSet<TSource>(this IEnumerable<TSource> source)
        {
            return new HashSet<TSource>(source);
        }
    }
}

This is just a greedy sub-optimal solution, but it still took 147 seconds to run. I think however, that this solution should be pretty close to optimal, so it should be good enough for my purposes. How can I speed it up though?

I commented out a few lines because they do more harm than good. Edit: Computing the universe should actually not be apart of the timing... that can be known beforehand.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

待天淡蓝洁白时 2024-10-03 23:03:34

我还没有深入研究您的代码/算法的细节,但我将使用一些理论来为您提供建议。正如亨克评论的那样,为了执行“良好”的基准测试,您必须删除所有不需要的代码,并在发布模式下通过全面优化并从命令行运行程序。

然后,请记住您正在运行托管代码:C#(和 Java)是为了互操作性而设计的,而不是为了性能,尽管它们仍然都是很好的平台。如果您需要性能,您应该尝试用 C++ 重新实现代码,或者,如果您愿意,请尝试将 Mono 与 AOT(提前编译器)一起使用:它会大大提高性能

 mono --aot=full YourProgram.exe

现在更多关于基准测试和最优性:您有吗将你的结果与其他人进行比较?您是否在相同的硬件上运行了其他集合覆盖算法,或者您可以将您的硬件与运行相同算法的其他硬件进行比较吗?

而且......您的解决方案与最佳解决方案有多接近?您能[自己]提供一个估计吗?关键在于 LINQ,我讨厌它,因为为了代码的简单性你失去了对代码的控制。 LINQ 的复杂性是多少?如果每个 LINQ 都是 O(n),那么您的算法就是 O(n^3),但我可能建议您替换

remaining.Any()

remaining.Count > 0

以获得一定程度的复杂性。

我的只是建议,希望对你有帮助

I haven't gone deeply into the detail of your code/algorithm, but I'm gonna use some theory to advice you. As henk commented, in order to perform a "good" benchmark you MUST remove all unneeded code and run your program in Release mode with full optimization and from commandline.

Then, remember that you are running managed code: C# (and Java) are designed for interoperability, not for performance, while they are still both good platforms. You should try either to reimplement your code in C++ if you need performance, or, if you wish, try to use Mono with AOT (ahead-of-time compiler): it bursts performance a lot

 mono --aot=full YourProgram.exe

Now more about benchmarks and optimality: have you compared your results with others? Did you run other set-cover algorithms on your same hardware, or can you compare your hardware to others that ran the same algorithm?

And... how close is your solution to optimal? Can you provide [yourself] an estimate? The key is in LINQ, which I hate because you lose control of your code for simplicity of code. What's the complexity of a LINQ? If each LINQ is O(n), your algorithm is O(n^3) but I might suggest you to replace

remaining.Any()

with

remaining.Count > 0

to gain a magnitude of complexity.

Mine are just advices, hope to have been of help

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文