如何仅保留集合的定义子集

发布于 2024-12-08 16:57:10 字数 3346 浏览 0 评论 0原文

我只想保留集合的定义子集。我没有找到任何相关信息。这很难解释,所以我举了一个例子:

假设我有这个集合:

db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.4});
db.mycollection.save({ "category" : 1201, "score" : 0.3});
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.1});

db.mycollection.save({ "category" : 1202, "score" : 0.5});
db.mycollection.save({ "category" : 1202, "score" : 0.6});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.4});
db.mycollection.save({ "category" : 1202, "score" : 0.3});

db.mycollection.save({ "category" : 1203, "score" : 0.8});
db.mycollection.save({ "category" : 1203, "score" : 0.4});
db.mycollection.save({ "category" : 1203, "score" : 0.7});
db.mycollection.save({ "category" : 1203, "score" : 0.3});

db.mycollection.save({ "category" : 1204, "score" : 0.2});
db.mycollection.save({ "category" : 1204, "score" : 0.8});
db.mycollection.save({ "category" : 1204, "score" : 0.7});
db.mycollection.save({ "category" : 1204, "score" : 0.9});

我的目标是获得所有类别中最好的 3 行(关于分数)。 在这个例子中,我尝试得到这样的结果:

{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.4 }
{ "category" : 1202, "score" : 0.6 }
{ "category" : 1202, "score" : 0.5 }
{ "category" : 1202, "score" : 0.4 }
{ "category" : 1203, "score" : 0.8 }
{ "category" : 1203, "score" : 0.7 }
{ "category" : 1203, "score" : 0.4 }
{ "category" : 1204, "score" : 0.9 }
{ "category" : 1204, "score" : 0.8 }
{ "category" : 1204, "score" : 0.7 }

但我真的不知道该怎么做。 我找到了一种运行地图缩减功能的解决方法,但它真的很慢。 这就是我所做的:

var map = function()
{
    emit(this.category, this.score);
}

var reduce = function(key, values)
{
    var total = [];
    values.forEach(function(value)
    {
        total.push(value);
    });
    total.sort();
    total.reverse();
    total = total.splice(0, 3);

    return {scores: total};
}

db.mycollection.mapReduce(map, reduce, { out : "myoutput" } );
db.myoutput.find();
db.myoutput.drop();

结果是:

{ "_id" : 1201, "value" : { "scores" : [ 0.5, 0.5, 0.4 ] } }
{ "_id" : 1202, "value" : { "scores" : [ 0.6, 0.5, 0.4 ] } }
{ "_id" : 1203, "value" : { "scores" : [ 0.8, 0.7, 0.4 ] } }
{ "_id" : 1204, "value" : { "scores" : [ 0.9, 0.8, 0.7 ] } }

这并不完全是我想要的,但它完成了工作。

我的问题是:不使用map-reduce 可以做到这一点吗? (或者表现良好?)

PS:请原谅我糟糕的英语。我不太流利。


编辑:

我终于找到了这个解决方案:

var map = function()
{
   emit(this.category, this.score);
}

var reduce = function(key, values)
{
    var total = [];
    values.forEach(function(value)
    {
        if (value instanceof Array)
            total.concat(value);
        else if (value instanceof Object)
        {
             if (value.scores instanceof Array)
                total.concat(value.scores);
             else
                total.push(value.scores);
        }
        else
            total.push(value);
    });
    total.sort(function (a,b) { return b - a} );
    total = total.splice(0, 3);

    return {scores: total};
}

I would like to keep only a defined subset of a collection. I don't find any relevant information about it. It's hard to explain, so I put an exemple:

Let's say I have this collection:

db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.4});
db.mycollection.save({ "category" : 1201, "score" : 0.3});
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.1});

db.mycollection.save({ "category" : 1202, "score" : 0.5});
db.mycollection.save({ "category" : 1202, "score" : 0.6});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.4});
db.mycollection.save({ "category" : 1202, "score" : 0.3});

db.mycollection.save({ "category" : 1203, "score" : 0.8});
db.mycollection.save({ "category" : 1203, "score" : 0.4});
db.mycollection.save({ "category" : 1203, "score" : 0.7});
db.mycollection.save({ "category" : 1203, "score" : 0.3});

db.mycollection.save({ "category" : 1204, "score" : 0.2});
db.mycollection.save({ "category" : 1204, "score" : 0.8});
db.mycollection.save({ "category" : 1204, "score" : 0.7});
db.mycollection.save({ "category" : 1204, "score" : 0.9});

My goal is to get the best 3 rows of all categories (regarding the score).
In this example, I try to get this kind of results:

{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.4 }
{ "category" : 1202, "score" : 0.6 }
{ "category" : 1202, "score" : 0.5 }
{ "category" : 1202, "score" : 0.4 }
{ "category" : 1203, "score" : 0.8 }
{ "category" : 1203, "score" : 0.7 }
{ "category" : 1203, "score" : 0.4 }
{ "category" : 1204, "score" : 0.9 }
{ "category" : 1204, "score" : 0.8 }
{ "category" : 1204, "score" : 0.7 }

But I really don't know how to do it.
I found a workaround running a map reduce function, but it's really really slow.
This is what I done:

var map = function()
{
    emit(this.category, this.score);
}

var reduce = function(key, values)
{
    var total = [];
    values.forEach(function(value)
    {
        total.push(value);
    });
    total.sort();
    total.reverse();
    total = total.splice(0, 3);

    return {scores: total};
}

db.mycollection.mapReduce(map, reduce, { out : "myoutput" } );
db.myoutput.find();
db.myoutput.drop();

The result is:

{ "_id" : 1201, "value" : { "scores" : [ 0.5, 0.5, 0.4 ] } }
{ "_id" : 1202, "value" : { "scores" : [ 0.6, 0.5, 0.4 ] } }
{ "_id" : 1203, "value" : { "scores" : [ 0.8, 0.7, 0.4 ] } }
{ "_id" : 1204, "value" : { "scores" : [ 0.9, 0.8, 0.7 ] } }

It's not exactly what I wanted, but it do the job.

My question is: It is possible to do that without using map-reduce? (Or with good performance?)

PS: Excuse my poor english. I'm not fluent.


EDIT:

I finally came with this solution:

var map = function()
{
   emit(this.category, this.score);
}

var reduce = function(key, values)
{
    var total = [];
    values.forEach(function(value)
    {
        if (value instanceof Array)
            total.concat(value);
        else if (value instanceof Object)
        {
             if (value.scores instanceof Array)
                total.concat(value.scores);
             else
                total.push(value.scores);
        }
        else
            total.push(value);
    });
    total.sort(function (a,b) { return b - a} );
    total = total.splice(0, 3);

    return {scores: total};
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

一身骄傲 2024-12-15 16:57:11

您可以很容易地获得给定类别的结果,

db.myCollection.find({category : 1204}).sort({score : -1}.limit(3)

这将为给定类别提供 3 个最佳分数,

然后您可以对类别进行循环,但这将需要大量请求(每个类别一个)。

地图缩减解决方案是执行此操作的唯一方法,并且您似乎有一个可行的解决方案。
如果你想提高性能,请研究reduce函数,特别是以下部分,这不是很好:

values.forEach(function(value)
{
    total.push(value);
});
total.sort();
total.reverse();
total = total.splice(0, 3);

You can very easily have your result for a given catagery

db.myCollection.find({category : 1204}).sort({score : -1}.limit(3)

this will give the 3 best scores for a given category

you can then do a loop of the categories but this will require a lot of requests (one per category).

The map reduce solution is the only way to do this and you seem to have a working solution.
If you want to improve your performance, work on the reduce function, expecially the following part which is not very good :

values.forEach(function(value)
{
    total.push(value);
});
total.sort();
total.reverse();
total = total.splice(0, 3);
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文