如何仅保留集合的定义子集
我只想保留集合的定义子集。我没有找到任何相关信息。这很难解释,所以我举了一个例子:
假设我有这个集合:
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.4});
db.mycollection.save({ "category" : 1201, "score" : 0.3});
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.5});
db.mycollection.save({ "category" : 1202, "score" : 0.6});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.4});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1203, "score" : 0.8});
db.mycollection.save({ "category" : 1203, "score" : 0.4});
db.mycollection.save({ "category" : 1203, "score" : 0.7});
db.mycollection.save({ "category" : 1203, "score" : 0.3});
db.mycollection.save({ "category" : 1204, "score" : 0.2});
db.mycollection.save({ "category" : 1204, "score" : 0.8});
db.mycollection.save({ "category" : 1204, "score" : 0.7});
db.mycollection.save({ "category" : 1204, "score" : 0.9});
我的目标是获得所有类别中最好的 3 行(关于分数)。 在这个例子中,我尝试得到这样的结果:
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.4 }
{ "category" : 1202, "score" : 0.6 }
{ "category" : 1202, "score" : 0.5 }
{ "category" : 1202, "score" : 0.4 }
{ "category" : 1203, "score" : 0.8 }
{ "category" : 1203, "score" : 0.7 }
{ "category" : 1203, "score" : 0.4 }
{ "category" : 1204, "score" : 0.9 }
{ "category" : 1204, "score" : 0.8 }
{ "category" : 1204, "score" : 0.7 }
但我真的不知道该怎么做。 我找到了一种运行地图缩减功能的解决方法,但它真的很慢。 这就是我所做的:
var map = function()
{
emit(this.category, this.score);
}
var reduce = function(key, values)
{
var total = [];
values.forEach(function(value)
{
total.push(value);
});
total.sort();
total.reverse();
total = total.splice(0, 3);
return {scores: total};
}
db.mycollection.mapReduce(map, reduce, { out : "myoutput" } );
db.myoutput.find();
db.myoutput.drop();
结果是:
{ "_id" : 1201, "value" : { "scores" : [ 0.5, 0.5, 0.4 ] } }
{ "_id" : 1202, "value" : { "scores" : [ 0.6, 0.5, 0.4 ] } }
{ "_id" : 1203, "value" : { "scores" : [ 0.8, 0.7, 0.4 ] } }
{ "_id" : 1204, "value" : { "scores" : [ 0.9, 0.8, 0.7 ] } }
这并不完全是我想要的,但它完成了工作。
我的问题是:不使用map-reduce 可以做到这一点吗? (或者表现良好?)
PS:请原谅我糟糕的英语。我不太流利。
编辑:
我终于找到了这个解决方案:
var map = function()
{
emit(this.category, this.score);
}
var reduce = function(key, values)
{
var total = [];
values.forEach(function(value)
{
if (value instanceof Array)
total.concat(value);
else if (value instanceof Object)
{
if (value.scores instanceof Array)
total.concat(value.scores);
else
total.push(value.scores);
}
else
total.push(value);
});
total.sort(function (a,b) { return b - a} );
total = total.splice(0, 3);
return {scores: total};
}
I would like to keep only a defined subset of a collection. I don't find any relevant information about it. It's hard to explain, so I put an exemple:
Let's say I have this collection:
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.4});
db.mycollection.save({ "category" : 1201, "score" : 0.3});
db.mycollection.save({ "category" : 1201, "score" : 0.5});
db.mycollection.save({ "category" : 1201, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.5});
db.mycollection.save({ "category" : 1202, "score" : 0.6});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1202, "score" : 0.1});
db.mycollection.save({ "category" : 1202, "score" : 0.4});
db.mycollection.save({ "category" : 1202, "score" : 0.3});
db.mycollection.save({ "category" : 1203, "score" : 0.8});
db.mycollection.save({ "category" : 1203, "score" : 0.4});
db.mycollection.save({ "category" : 1203, "score" : 0.7});
db.mycollection.save({ "category" : 1203, "score" : 0.3});
db.mycollection.save({ "category" : 1204, "score" : 0.2});
db.mycollection.save({ "category" : 1204, "score" : 0.8});
db.mycollection.save({ "category" : 1204, "score" : 0.7});
db.mycollection.save({ "category" : 1204, "score" : 0.9});
My goal is to get the best 3 rows of all categories (regarding the score).
In this example, I try to get this kind of results:
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.5 }
{ "category" : 1201, "score" : 0.4 }
{ "category" : 1202, "score" : 0.6 }
{ "category" : 1202, "score" : 0.5 }
{ "category" : 1202, "score" : 0.4 }
{ "category" : 1203, "score" : 0.8 }
{ "category" : 1203, "score" : 0.7 }
{ "category" : 1203, "score" : 0.4 }
{ "category" : 1204, "score" : 0.9 }
{ "category" : 1204, "score" : 0.8 }
{ "category" : 1204, "score" : 0.7 }
But I really don't know how to do it.
I found a workaround running a map reduce function, but it's really really slow.
This is what I done:
var map = function()
{
emit(this.category, this.score);
}
var reduce = function(key, values)
{
var total = [];
values.forEach(function(value)
{
total.push(value);
});
total.sort();
total.reverse();
total = total.splice(0, 3);
return {scores: total};
}
db.mycollection.mapReduce(map, reduce, { out : "myoutput" } );
db.myoutput.find();
db.myoutput.drop();
The result is:
{ "_id" : 1201, "value" : { "scores" : [ 0.5, 0.5, 0.4 ] } }
{ "_id" : 1202, "value" : { "scores" : [ 0.6, 0.5, 0.4 ] } }
{ "_id" : 1203, "value" : { "scores" : [ 0.8, 0.7, 0.4 ] } }
{ "_id" : 1204, "value" : { "scores" : [ 0.9, 0.8, 0.7 ] } }
It's not exactly what I wanted, but it do the job.
My question is: It is possible to do that without using map-reduce? (Or with good performance?)
PS: Excuse my poor english. I'm not fluent.
EDIT:
I finally came with this solution:
var map = function()
{
emit(this.category, this.score);
}
var reduce = function(key, values)
{
var total = [];
values.forEach(function(value)
{
if (value instanceof Array)
total.concat(value);
else if (value instanceof Object)
{
if (value.scores instanceof Array)
total.concat(value.scores);
else
total.push(value.scores);
}
else
total.push(value);
});
total.sort(function (a,b) { return b - a} );
total = total.splice(0, 3);
return {scores: total};
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
您可以很容易地获得给定类别的结果,
这将为给定类别提供 3 个最佳分数,
然后您可以对类别进行循环,但这将需要大量请求(每个类别一个)。
地图缩减解决方案是执行此操作的唯一方法,并且您似乎有一个可行的解决方案。
如果你想提高性能,请研究reduce函数,特别是以下部分,这不是很好:
You can very easily have your result for a given catagery
this will give the 3 best scores for a given category
you can then do a loop of the categories but this will require a lot of requests (one per category).
The map reduce solution is the only way to do this and you seem to have a working solution.
If you want to improve your performance, work on the reduce function, expecially the following part which is not very good :