1.5 store_test.cc代码阅读

// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
using namespace std::placeholders;
typedef boost::mt11213b gen_type;
const uint64_t DEF_STORE_TEST_BLOCKDEV_SIZE = 10240000000;
#define dout_context g_ceph_context
#define GTEST_HAS_PARAM_TEST 1
#if GTEST_HAS_PARAM_TEST
// 检测两个bufferlist是否相等
// 注意不是两个string是否相等
// 类似于
// ["a", "a", "a", "b"]
// ["aa", "ab"]
// 这两个bufferlist应该是相等的。
static bool bl_eq(bufferlist& expected, bufferlist& actual)
{
if (expected.contents_equal(actual))
return true;
unsigned first = 0;
if(expected.length() != actual.length()) {
cout << "--- buffer lengths mismatch " << std::hex
<< "expected 0x" << expected.length() << " != actual 0x"
<< actual.length() << std::dec << std::endl;
derr << "--- buffer lengths mismatch " << std::hex
<< "expected 0x" << expected.length() << " != actual 0x"
<< actual.length() << std::dec << dendl;
}
auto len = std::min(expected.length(), actual.length());
while ( first<len && expected[first] == actual[first])
++first;
unsigned last = len;
while (last > 0 && expected[last-1] == actual[last-1])
--last;
if(len > 0) {
cout << "--- buffer mismatch between offset 0x" << std::hex << first
<< " and 0x" << last << ", total 0x" << len << std::dec
<< std::endl;
derr << "--- buffer mismatch between offset 0x" << std::hex << first
<< " and 0x" << last << ", total 0x" << len << std::dec
<< dendl;
cout << "--- expected:\n";
expected.hexdump(cout);
cout << "--- actual:\n";
actual.hexdump(cout);
}
return false;
}
template <typename T>
int queue_transaction(
T &store,
ObjectStore::CollectionHandle ch,
ObjectStore::Transaction &&t)
{
if (rand() % 2) {
// 这里生成一个空事务,然后把t包含进去
// 本质上是在测[]空事务,append实体事务的情况。
ObjectStore::Transaction t2;
t2.append(t);
return store->queue_transaction(ch, std::move(t2));
} else {
return store->queue_transaction(ch, std::move(t));
}
}
bool sorted(const vector<ghobject_t> &in)
{
ghobject_t start;
for (vector<ghobject_t>::const_iterator i = in.begin();
i != in.end();
++i) {
if (start > *i) {
cout << start << " should follow " << *i << std::endl;
return false;
}
start = *i;
}
return true;
}
// 这里设置统一测试参数const char *
// 主要是为了测试各种bluestore, filestore, memstore, kvstore底层存储
class StoreTest : public StoreTestFixture, public ::testing::WithParamInterface<const char*>
{
public:
// 获得参数,初始化
// 这里使用gtest::GetParam()
// 这是因为StoreTestFixture的构造函数的参数就是
// char *type
// 注意看一下文章:
// https://blog.csdn.net/breaksoftware/article/details/51059583
StoreTest() : StoreTestFixture(GetParam()) {}
// 压缩测试
void doCompressionTest();
// 合成的,人造的测试
void doSyntheticTest(
int num_ops,
uint64_t max_obj, uint64_t max_wr, uint64_t align);
};
class StoreTestDeferredSetup : public StoreTest
{
void SetUp() override
{
//do nothing
}
protected:
void DeferredSetup()
{
StoreTest::SetUp();
}
public:
};
class StoreTestSpecificAUSize : public StoreTestDeferredSetup
{
public:
// 这里定义在不同配置下需要运行的函数
typedef
std::function<void(
uint64_t num_ops,
uint64_t max_obj,
uint64_t max_wr,
uint64_t align)> MatrixTest;
void StartDeferred(size_t min_alloc_size)
{
SetVal(g_conf, "bluestore_min_alloc_size", stringify(min_alloc_size).c_str());
DeferredSetup();
}
private:
// bluestore matrix testing
uint64_t max_write = 40 * 1024;
uint64_t max_size = 400 * 1024;
uint64_t alignment = 0;
uint64_t num_ops = 10000;
protected:
string matrix_get(const char *k)
{
if (string(k) == "max_write") {
return stringify(max_write);
} else if (string(k) == "max_size") {
return stringify(max_size);
} else if (string(k) == "alignment") {
return stringify(alignment);
} else if (string(k) == "num_ops") {
return stringify(num_ops);
} else {
char *buf;
g_conf->get_val(k, &buf, -1);
string v = buf;
free(buf);
return v;
}
}
void matrix_set(const char *k, const char *v)
{
if (string(k) == "max_write") {
max_write = atoll(v);
} else if (string(k) == "max_size") {
max_size = atoll(v);
} else if (string(k) == "alignment") {
alignment = atoll(v);
} else if (string(k) == "num_ops") {
num_ops = atoll(v);
} else {
SetVal(g_conf, k, v);
}
}
// 整个函数结构来看就是一个DFS
// 会尝试matrix里面的所有的配置,然后
// 运行fn函数。
// matrix是一个二维矩阵,里面的每个元素都是一个字符串
// 这里相当于是只处理第i行
void do_matrix_choose(const char *matrix[][10],
int i, int pos, int num,
MatrixTest fn)
{
// 如果这一行非空
if (matrix[i][0]) {
// count是拿到第i行有多少字符串
int count;
for (count = 0; matrix[i][count+1]; ++count) ;
// 依次处理第i行里面的每个字符串
// 这里跳掉了第i行的第1个元素
// 然后开始设置第1~count个元素
for (int j = 1; matrix[i][j]; ++j) {
// matrix[i][0]表示的是key值
matrix_set(matrix[i][0], matrix[i][j]);
do_matrix_choose(matrix,
i + 1,
pos * count + j - 1,
num * count,
fn);
}
} else {
cout << "---------------------- " << (pos + 1) << " / " << num
<< " ----------------------" << std::endl;
for (unsigned k=0; matrix[k][0]; ++k) {
cout << " " << matrix[k][0] << " = " << matrix_get(matrix[k][0])
<< std::endl;
}
g_ceph_context->_conf->apply_changes(NULL);
fn(num_ops, max_size, max_write, alignment);
}
}
void do_matrix(const char *matrix[][10],
MatrixTest fn)
{
if (strcmp(matrix[0][0], "bluestore_min_alloc_size") == 0) {
int count;
for (count = 0; matrix[0][count+1]; ++count) ;
for (size_t j = 1; matrix[0][j]; ++j) {
if (j > 1) {
// teardown主要是负责测试垃圾回收
TearDown();
}
StartDeferred(strtoll(matrix[0][j], NULL, 10));
do_matrix_choose(matrix, 1, j - 1, count, fn);
}
} else {
StartDeferred(0);
do_matrix_choose(matrix, 0, 0, 1, fn);
}
}
};
// 第一个参数是类名,第二个参数是函数名
TEST_P(StoreTest, collect_metadata)
{
// 这里可以直接访问类的内部成员
// store成员就是StoreTest里面的store成员。
map<string,string> pm;
store->collect_metadata(&pm);
if (GetParam() == string("filestore")) {
ASSERT_NE(pm.count("filestore_backend"), 0u);
ASSERT_NE(pm.count("filestore_f_type"), 0u);
ASSERT_NE(pm.count("backend_filestore_partition_path"), 0u);
ASSERT_NE(pm.count("backend_filestore_dev_node"), 0u);
}
}
TEST_P(StoreTest, Trivial)
{
}
TEST_P(StoreTest, TrivialRemount)
{
int r = store->umount();
ASSERT_EQ(0, r);
r = store->mount();
ASSERT_EQ(0, r);
}
TEST_P(StoreTest, SimpleRemount)
{
coll_t cid;
// 各种内部object的深入介绍
// http://bean-li.github.io/ceph-object-in-bottom/
// sobject表示清加了snaphot支持的object
// hobject表示支持hashed object,就是说这个object可以和具体的PG对应起来
// ghobject表示的是支持EC的object
// 这里生成两个对象
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
bufferlist bl;
bl.append("1234512345");
int r;
// 这里是去要一个新的collection.
auto ch = store->create_new_collection(cid);
{
cerr << "create collection + write" << std::endl;
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.write(cid, hoid, 0, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ch.reset();
r = store->umount();
ASSERT_EQ(0, r);
r = store->mount();
ASSERT_EQ(0, r);
ch = store->open_collection(cid);
{
ObjectStore::Transaction t;
t.write(cid, hoid2, 0, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ch.reset();
r = store->umount();
ASSERT_EQ(0, r);
r = store->mount();
ASSERT_EQ(0, r);
ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
}
{
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, IORemount)
{
coll_t cid;
bufferlist bl;
bl.append("1234512345");
int r;
auto ch = store->create_new_collection(cid);
{
cerr << "create collection + objects" << std::endl;
ObjectStore::Transaction t;
t.create_collection(cid, 0);
for (int n=1; n<=100; ++n) {
ghobject_t hoid(hobject_t(sobject_t("Object " + stringify(n), CEPH_NOSNAP)));
t.write(cid, hoid, 0, bl.length(), bl);
}
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
// overwrites
{
cout << "overwrites" << std::endl;
for (int n=1; n<=100; ++n) {
ObjectStore::Transaction t;
ghobject_t hoid(hobject_t(sobject_t("Object " + stringify(n), CEPH_NOSNAP)));
t.write(cid, hoid, 1, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
ch.reset();
r = store->umount();
ASSERT_EQ(0, r);
r = store->mount();
ASSERT_EQ(0, r);
{
ObjectStore::Transaction t;
for (int n=1; n<=100; ++n) {
ghobject_t hoid(hobject_t(sobject_t("Object " + stringify(n), CEPH_NOSNAP)));
t.remove(cid, hoid);
}
t.remove_collection(cid);
auto ch = store->open_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, UnprintableCharsName)
{
coll_t cid;
string name = "funnychars_";
for (unsigned i = 0; i < 256; ++i) {
name.push_back(i);
}
ghobject_t oid(hobject_t(sobject_t(name, CEPH_NOSNAP)));
int r;
auto ch = store->create_new_collection(cid);
{
cerr << "create collection + object" << std::endl;
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, oid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ch.reset();
r = store->umount();
ASSERT_EQ(0, r);
r = store->mount();
ASSERT_EQ(0, r);
{
cout << "removing" << std::endl;
ObjectStore::Transaction t;
t.remove(cid, oid);
t.remove_collection(cid);
auto ch = store->open_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, FiemapEmpty)
{
coll_t cid;
int r = 0;
ghobject_t oid(hobject_t(sobject_t("fiemap_object", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, oid);
t.truncate(cid, oid, 100000);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist bl;
store->fiemap(ch, oid, 0, 100000, bl);
map<uint64_t,uint64_t> m, e;
auto p = bl.cbegin();
decode(m, p);
cout << " got " << m << std::endl;
e[0] = 100000;
EXPECT_TRUE(m == e || m.empty());
}
{
ObjectStore::Transaction t;
t.remove(cid, oid);
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, FiemapHoles)
{
const uint64_t MAX_EXTENTS = 4000;
const uint64_t SKIP_STEP = 65536;
coll_t cid;
int r = 0;
ghobject_t oid(hobject_t(sobject_t("fiemap_object", CEPH_NOSNAP)));
bufferlist bl;
bl.append("foo");
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, oid);
for (uint64_t i = 0; i < MAX_EXTENTS; i++)
t.write(cid, oid, SKIP_STEP * i, 3, bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
//fiemap test from 0 to SKIP_STEP * (MAX_EXTENTS - 1) + 3
bufferlist bl;
store->fiemap(ch, oid, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3, bl);
map<uint64_t,uint64_t> m, e;
auto p = bl.cbegin();
decode(m, p);
cout << " got " << m << std::endl;
ASSERT_TRUE(!m.empty());
ASSERT_GE(m[0], 3u);
auto last = m.crbegin();
if (m.size() == 1) {
ASSERT_EQ(0u, last->first);
} else if (m.size() == MAX_EXTENTS) {
for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
ASSERT_TRUE(m.count(SKIP_STEP * i));
}
}
ASSERT_GT(last->first + last->second, SKIP_STEP * (MAX_EXTENTS - 1));
}
{
// fiemap test from SKIP_STEP to SKIP_STEP * (MAX_EXTENTS - 2) + 3
bufferlist bl;
store->fiemap(ch, oid, SKIP_STEP, SKIP_STEP * (MAX_EXTENTS - 2) + 3, bl);
map<uint64_t,uint64_t> m, e;
auto p = bl.cbegin();
decode(m, p);
cout << " got " << m << std::endl;
ASSERT_TRUE(!m.empty());
// kstore always returns [0, object_size] regardless of offset and length
// FIXME: if fiemap logic in kstore is refined
if (string(GetParam()) != "kstore") {
ASSERT_GE(m[SKIP_STEP], 3u);
auto last = m.crbegin();
if (m.size() == 1) {
ASSERT_EQ(SKIP_STEP, last->first);
} else if (m.size() == MAX_EXTENTS - 2) {
for (uint64_t i = 1; i < MAX_EXTENTS - 1; i++) {
ASSERT_TRUE(m.count(SKIP_STEP*i));
}
}
ASSERT_GT(last->first + last->second, SKIP_STEP * (MAX_EXTENTS - 1));
}
}
{
ObjectStore::Transaction t;
t.remove(cid, oid);
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimpleMetaColTest)
{
coll_t cid;
int r = 0;
{
auto ch = store->create_new_collection(cid);
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "create collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
auto ch = store->open_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
auto ch = store->create_new_collection(cid);
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "add collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
auto ch = store->open_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimplePGColTest)
{
coll_t cid(spg_t(pg_t(1,2), shard_id_t::NO_SHARD));
int r = 0;
{
ObjectStore::Transaction t;
auto ch = store->create_new_collection(cid);
t.create_collection(cid, 4);
cerr << "create collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
auto ch = store->open_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.create_collection(cid, 4);
cerr << "add collection" << std::endl;
auto ch = store->create_new_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
auto ch = store->open_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimpleColPreHashTest)
{
// Firstly we will need to revert the value making sure
// collection hint actually works
int merge_threshold = g_ceph_context->_conf->filestore_merge_threshold;
std::ostringstream oss;
if (merge_threshold > 0) {
oss << "-" << merge_threshold;
SetVal(g_conf, "filestore_merge_threshold", oss.str().c_str());
}
uint32_t pg_num = 128;
boost::uniform_int<> pg_id_range(0, pg_num);
gen_type rng(time(NULL));
int pg_id = pg_id_range(rng);
int objs_per_folder = abs(merge_threshold) * 16 * g_ceph_context->_conf->filestore_split_multiple;
boost::uniform_int<> folders_range(5, 256);
uint64_t expected_num_objs = (uint64_t)objs_per_folder * (uint64_t)folders_range(rng);
coll_t cid(spg_t(pg_t(pg_id, 15), shard_id_t::NO_SHARD));
int r;
auto ch = store->create_new_collection(cid);
{
// Create a collection along with a hint
ObjectStore::Transaction t;
t.create_collection(cid, 5);
cerr << "create collection" << std::endl;
bufferlist hint;
encode(pg_num, hint);
encode(expected_num_objs, hint);
t.collection_hint(cid, ObjectStore::Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS, hint);
cerr << "collection hint" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// Remove the collection
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "remove collection" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SmallBlockWrites)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist a;
bufferptr ap(0x1000);
memset(ap.c_str(), 'a', 0x1000);
a.append(ap);
bufferlist b;
bufferptr bp(0x1000);
memset(bp.c_str(), 'b', 0x1000);
b.append(bp);
bufferlist c;
bufferptr cp(0x1000);
memset(cp.c_str(), 'c', 0x1000);
c.append(cp);
bufferptr zp(0x1000);
zp.zero();
bufferlist z;
z.append(zp);
{
ObjectStore::Transaction t;
t.write(cid, hoid, 0, 0x1000, a);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in, exp;
r = store->read(ch, hoid, 0, 0x4000, in);
ASSERT_EQ(0x1000, r);
exp.append(a);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
t.write(cid, hoid, 0x1000, 0x1000, b);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in, exp;
r = store->read(ch, hoid, 0, 0x4000, in);
ASSERT_EQ(0x2000, r);
exp.append(a);
exp.append(b);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
t.write(cid, hoid, 0x3000, 0x1000, c);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in, exp;
r = store->read(ch, hoid, 0, 0x4000, in);
ASSERT_EQ(0x4000, r);
exp.append(a);
exp.append(b);
exp.append(z);
exp.append(c);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
t.write(cid, hoid, 0x2000, 0x1000, a);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in, exp;
r = store->read(ch, hoid, 0, 0x4000, in);
ASSERT_EQ(0x4000, r);
exp.append(a);
exp.append(b);
exp.append(a);
exp.append(c);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
t.write(cid, hoid, 0, 0x1000, c);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist in, exp;
r = store->read(ch, hoid, 0, 0x4000, in);
ASSERT_EQ(0x4000, r);
exp.append(c);
exp.append(b);
exp.append(a);
exp.append(c);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, BufferCacheReadTest)
{
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
{
auto ch = store->open_collection(cid);
ASSERT_FALSE(ch);
}
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
exists = store->exists(ch, hoid);
ASSERT_EQ(true, exists);
}
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append("abcde");
t.write(cid, hoid, 0, 5, bl);
t.write(cid, hoid, 10, 5, bl);
cerr << "TwinWrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, 15, newdata);
ASSERT_EQ(r, 15);
{
bufferlist expected;
expected.append(bl);
expected.append_zero(5);
expected.append(bl);
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
//overwrite over the same extents
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append("edcba");
t.write(cid, hoid, 0, 5, bl);
t.write(cid, hoid, 10, 5, bl);
cerr << "TwinWrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, 15, newdata);
ASSERT_EQ(r, 15);
{
bufferlist expected;
expected.append(bl);
expected.append_zero(5);
expected.append(bl);
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
//additional write to an unused region of some blob
{
ObjectStore::Transaction t;
bufferlist bl2, newdata;
bl2.append("1234567890");
t.write(cid, hoid, 20, bl2.length(), bl2);
cerr << "Append" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, 30, newdata);
ASSERT_EQ(r, 30);
{
bufferlist expected;
expected.append("edcba");
expected.append_zero(5);
expected.append("edcba");
expected.append_zero(5);
expected.append(bl2);
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
//additional write to an unused region of some blob and partial owerite over existing extents
{
ObjectStore::Transaction t;
bufferlist bl, bl2, bl3, newdata;
bl.append("DCB");
bl2.append("1234567890");
bl3.append("BA");
t.write(cid, hoid, 30, bl2.length(), bl2);
t.write(cid, hoid, 1, bl.length(), bl);
t.write(cid, hoid, 13, bl3.length(), bl3);
cerr << "TripleWrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, 40, newdata);
ASSERT_EQ(r, 40);
{
bufferlist expected;
expected.append("eDCBa");
expected.append_zero(5);
expected.append("edcBA");
expected.append_zero(5);
expected.append(bl2);
expected.append(bl2);
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
}
void StoreTest::doCompressionTest()
{
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
{
auto ch = store->open_collection(cid);
ASSERT_FALSE(ch);
}
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
exists = store->exists(ch, hoid);
ASSERT_EQ(true, exists);
}
std::string data;
data.resize(0x10000 * 4);
for(size_t i = 0; i < data.size(); i++)
data[i] = i / 256;
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append(data);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "CompressibleData (4xAU) Write" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, data.size() , newdata);
ASSERT_EQ(r, (int)data.size());
{
bufferlist expected;
expected.append(data);
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 0, 711 , newdata);
ASSERT_EQ(r, 711);
{
bufferlist expected;
expected.append(data.substr(0,711));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 0xf00f, data.size(), newdata);
ASSERT_EQ(r, int(data.size() - 0xf00f) );
{
bufferlist expected;
expected.append(data.substr(0xf00f));
ASSERT_TRUE(bl_eq(expected, newdata));
}
{
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.stored, (unsigned)data.size());
ASSERT_LE(statfs.compressed, (unsigned)data.size());
ASSERT_EQ(statfs.compressed_original, (unsigned)data.size());
ASSERT_LE(statfs.compressed_allocated, (unsigned)data.size());
}
}
std::string data2;
data2.resize(0x10000 * 4 - 0x9000);
for(size_t i = 0; i < data2.size(); i++)
data2[i] = (i+1) / 256;
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append(data2);
t.write(cid, hoid, 0x8000, bl.length(), bl);
cerr << "CompressibleData partial overwrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, 0x10000, newdata);
ASSERT_EQ(r, (int)0x10000);
{
bufferlist expected;
expected.append(data.substr(0, 0x8000));
expected.append(data2.substr(0, 0x8000));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 0x9000, 711 , newdata);
ASSERT_EQ(r, 711);
{
bufferlist expected;
expected.append(data2.substr(0x1000,711));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 0x0, 0x40000, newdata);
ASSERT_EQ(r, int(0x40000) );
{
bufferlist expected;
expected.append(data.substr(0, 0x8000));
expected.append(data2.substr(0, 0x37000));
expected.append(data.substr(0x3f000, 0x1000));
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
data2.resize(0x3f000);
for(size_t i = 0; i < data2.size(); i++)
data2[i] = (i+2) / 256;
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append(data2);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "CompressibleData partial overwrite, two extents overlapped, single one to be removed" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0, 0x3e000 - 1, newdata);
ASSERT_EQ(r, (int)0x3e000 - 1);
{
bufferlist expected;
expected.append(data2.substr(0, 0x3e000 - 1));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 0x3e000-1, 0x2001, newdata);
ASSERT_EQ(r, 0x2001);
{
bufferlist expected;
expected.append(data2.substr(0x3e000-1, 0x1001));
expected.append(data.substr(0x3f000, 0x1000));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 0x0, 0x40000, newdata);
ASSERT_EQ(r, int(0x40000) );
{
bufferlist expected;
expected.append(data2.substr(0, 0x3f000));
expected.append(data.substr(0x3f000, 0x1000));
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
data.resize(0x1001);
for(size_t i = 0; i < data.size(); i++)
data[i] = (i+3) / 256;
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append(data);
t.write(cid, hoid, 0x3f000-1, bl.length(), bl);
cerr << "Small chunk partial overwrite, two extents overlapped, single one to be removed" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 0x3e000, 0x2000, newdata);
ASSERT_EQ(r, (int)0x2000);
{
bufferlist expected;
expected.append(data2.substr(0x3e000, 0x1000 - 1));
expected.append(data.substr(0, 0x1001));
ASSERT_TRUE(bl_eq(expected, newdata));
}
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
cerr << "Cleaning object" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
auto settingsBookmark = BookmarkSettings();
SetVal(g_conf, "bluestore_compression_min_blob_size", "262144");
g_ceph_context->_conf->apply_changes(NULL);
{
data.resize(0x10000*6);
for(size_t i = 0; i < data.size(); i++)
data[i] = i / 256;
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append(data);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "CompressibleData large blob" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, CompressionTest)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "bluestore_compression_algorithm", "snappy");
SetVal(g_conf, "bluestore_compression_mode", "force");
g_ceph_context->_conf->apply_changes(NULL);
doCompressionTest();
SetVal(g_conf, "bluestore_compression_algorithm", "zlib");
SetVal(g_conf, "bluestore_compression_mode", "aggressive");
g_ceph_context->_conf->apply_changes(NULL);
doCompressionTest();
}
TEST_P(StoreTest, SimpleObjectTest)
{
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
{
auto ch = store->open_collection(cid);
ASSERT_FALSE(ch);
}
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
exists = store->exists(ch, hoid);
ASSERT_EQ(true, exists);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.touch(cid, hoid);
cerr << "Remove then create" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl, orig;
bl.append("abcde");
orig = bl;
t.remove(cid, hoid);
t.write(cid, hoid, 0, 5, bl);
cerr << "Remove then create" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in;
r = store->read(ch, hoid, 0, 5, in);
ASSERT_EQ(5, r);
ASSERT_TRUE(bl_eq(orig, in));
}
{
ObjectStore::Transaction t;
bufferlist bl, exp;
bl.append("abcde");
exp = bl;
exp.append(bl);
t.write(cid, hoid, 5, 5, bl);
cerr << "Append" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in;
r = store->read(ch, hoid, 0, 10, in);
ASSERT_EQ(10, r);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
bufferlist bl, exp;
bl.append("abcdeabcde");
exp = bl;
t.write(cid, hoid, 0, 10, bl);
cerr << "Full overwrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in;
r = store->read(ch, hoid, 0, 10, in);
ASSERT_EQ(10, r);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append("abcde");
t.write(cid, hoid, 3, 5, bl);
cerr << "Partial overwrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in, exp;
exp.append("abcabcdede");
r = store->read(ch, hoid, 0, 10, in);
ASSERT_EQ(10, r);
in.hexdump(cout);
ASSERT_TRUE(bl_eq(exp, in));
}
{
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append("fghij");
t.truncate(cid, hoid, 0);
t.write(cid, hoid, 5, 5, bl);
cerr << "Truncate + hole" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append("abcde");
t.write(cid, hoid, 0, 5, bl);
cerr << "Reverse fill-in" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist in, exp;
exp.append("abcdefghij");
r = store->read(ch, hoid, 0, 10, in);
ASSERT_EQ(10, r);
in.hexdump(cout);
ASSERT_TRUE(bl_eq(exp, in));
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append("abcde01234012340123401234abcde01234012340123401234abcde01234012340123401234abcde01234012340123401234");
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "larger overwrite" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist in;
r = store->read(ch, hoid, 0, bl.length(), in);
ASSERT_EQ((int)bl.length(), r);
in.hexdump(cout);
ASSERT_TRUE(bl_eq(bl, in));
}
{
bufferlist bl;
bl.append("abcde01234012340123401234abcde01234012340123401234abcde01234012340123401234abcde01234012340123401234");
//test: offset=len=0 mean read all data
bufferlist in;
r = store->read(ch, hoid, 0, 0, in);
ASSERT_EQ((int)bl.length(), r);
in.hexdump(cout);
ASSERT_TRUE(bl_eq(bl, in));
}
{
//verifying unaligned csums
std::string s1("1"), s2(0x1000, '2'), s3("00");
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append(s1);
bl.append(s2);
t.truncate(cid, hoid, 0);
t.write(cid, hoid, 0x1000-1, bl.length(), bl);
cerr << "Write unaligned csum, stage 1" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist in, exp1, exp2, exp3;
exp1.append(s1);
exp2.append(s2);
exp3.append(s3);
r = store->read(ch, hoid, 0x1000-1, 1, in);
ASSERT_EQ(1, r);
ASSERT_TRUE(bl_eq(exp1, in));
in.clear();
r = store->read(ch, hoid, 0x1000, 0x1000, in);
ASSERT_EQ(0x1000, r);
ASSERT_TRUE(bl_eq(exp2, in));
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append(s3);
t.write(cid, hoid, 1, bl.length(), bl);
cerr << "Write unaligned csum, stage 2" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
in.clear();
r = store->read(ch, hoid, 1, 2, in);
ASSERT_EQ(2, r);
ASSERT_TRUE(bl_eq(exp3, in));
in.clear();
r = store->read(ch, hoid, 0x1000-1, 1, in);
ASSERT_EQ(1, r);
ASSERT_TRUE(bl_eq(exp1, in));
in.clear();
r = store->read(ch, hoid, 0x1000, 0x1000, in);
ASSERT_EQ(0x1000, r);
ASSERT_TRUE(bl_eq(exp2, in));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
#if defined(WITH_BLUESTORE)
TEST_P(StoreTestSpecificAUSize, BluestoreStatFSTest)
{
if(string(GetParam()) != "bluestore")
return;
StartDeferred(65536);
SetVal(g_conf, "bluestore_compression_mode", "force");
// just a big number to disble gc
SetVal(g_conf, "bluestore_gc_enable_total_threshold", "100000");
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
ghobject_t hoid2 = hoid;
hoid2.hobj.snap = 1;
{
auto ch = store->open_collection(cid);
ASSERT_FALSE(ch);
}
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
exists = store->exists(ch, hoid);
ASSERT_EQ(true, exists);
}
{
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ( 0u, statfs.allocated);
ASSERT_EQ( 0u, statfs.stored);
ASSERT_EQ(g_conf->bluestore_block_size, statfs.total);
ASSERT_TRUE(statfs.available > 0u && statfs.available < g_conf->bluestore_block_size);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append("abcde");
t.write(cid, hoid, 0, 5, bl);
cerr << "Append 5 bytes" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(5, statfs.stored);
ASSERT_EQ(0x10000, statfs.allocated);
ASSERT_EQ(0, statfs.compressed);
ASSERT_EQ(0, statfs.compressed_original);
ASSERT_EQ(0, statfs.compressed_allocated);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
std::string s(0x30000, 'a');
bufferlist bl;
bl.append(s);
t.write(cid, hoid, 0x10000, bl.length(), bl);
cerr << "Append 0x30000 compressible bytes" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x30005, statfs.stored);
ASSERT_EQ(0x30000, statfs.allocated);
ASSERT_LE(statfs.compressed, 0x10000);
ASSERT_EQ(0x20000, statfs.compressed_original);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
t.zero(cid, hoid, 1, 3);
t.zero(cid, hoid, 0x20000, 9);
cerr << "Punch hole at 1~3, 0x20000~9" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x30005 - 3 - 9, statfs.stored);
ASSERT_EQ(0x30000, statfs.allocated);
ASSERT_LE(statfs.compressed, 0x10000);
ASSERT_EQ(0x20000 - 9, statfs.compressed_original);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
std::string s(0x1000, 'b');
bufferlist bl;
bl.append(s);
t.write(cid, hoid, 1, bl.length(), bl);
t.write(cid, hoid, 0x10001, bl.length(), bl);
cerr << "Overwrite first and second(compressible) extents" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x30001 - 9 + 0x1000, statfs.stored);
ASSERT_EQ(0x40000, statfs.allocated);
ASSERT_LE(statfs.compressed, 0x10000);
ASSERT_EQ(0x20000 - 9 - 0x1000, statfs.compressed_original);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
std::string s(0x10000, 'c');
bufferlist bl;
bl.append(s);
t.write(cid, hoid, 0x10000, bl.length(), bl);
t.write(cid, hoid, 0x20000, bl.length(), bl);
t.write(cid, hoid, 0x30000, bl.length(), bl);
cerr << "Overwrite compressed extent with 3 uncompressible ones" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x30000 + 0x1001, statfs.stored);
ASSERT_EQ(0x40000, statfs.allocated);
ASSERT_LE(statfs.compressed, 0);
ASSERT_EQ(0, statfs.compressed_original);
ASSERT_EQ(0, statfs.compressed_allocated);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
t.zero(cid, hoid, 0, 0x40000);
cerr << "Zero object" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0u, statfs.allocated);
ASSERT_EQ(0u, statfs.stored);
ASSERT_EQ(0u, statfs.compressed_original);
ASSERT_EQ(0u, statfs.compressed);
ASSERT_EQ(0u, statfs.compressed_allocated);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
ObjectStore::Transaction t;
std::string s(0x10000, 'c');
bufferlist bl;
bl.append(s);
bl.append(s);
bl.append(s);
bl.append(s.substr(0, 0x10000-2));
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "Yet another compressible write" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x40000 - 2, statfs.stored);
ASSERT_EQ(0x30000, statfs.allocated);
ASSERT_LE(statfs.compressed, 0x10000);
ASSERT_EQ(0x20000, statfs.compressed_original);
ASSERT_EQ(0x10000, statfs.compressed_allocated);
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
}
{
struct store_statfs_t statfs;
r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ObjectStore::Transaction t;
t.clone(cid, hoid, hoid2);
cerr << "Clone compressed objecte" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs2;
r = store->statfs(&statfs2);
ASSERT_EQ(r, 0);
ASSERT_GT(statfs2.stored, statfs.stored);
ASSERT_EQ(statfs2.allocated, statfs.allocated);
ASSERT_GT(statfs2.compressed, statfs.compressed);
ASSERT_GT(statfs2.compressed_original, statfs.compressed_original);
ASSERT_EQ(statfs2.compressed_allocated, statfs.compressed_allocated);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ( 0u, statfs.allocated);
ASSERT_EQ( 0u, statfs.stored);
ASSERT_EQ( 0u, statfs.compressed_original);
ASSERT_EQ( 0u, statfs.compressed);
ASSERT_EQ( 0u, statfs.compressed_allocated);
}
}
TEST_P(StoreTestSpecificAUSize, BluestoreFragmentedBlobTest)
{
if(string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
exists = store->exists(ch, hoid);
ASSERT_EQ(true, exists);
}
{
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(g_conf->bluestore_block_size, statfs.total);
ASSERT_EQ(0u, statfs.allocated);
ASSERT_EQ(0u, statfs.stored);
ASSERT_TRUE(statfs.available > 0u && statfs.available < g_conf->bluestore_block_size);
}
std::string data;
data.resize(0x10000 * 3);
{
ObjectStore::Transaction t;
for(size_t i = 0; i < data.size(); i++)
data[i] = i / 256 + 1;
bufferlist bl, newdata;
bl.append(data);
t.write(cid, hoid, 0, bl.length(), bl);
t.zero(cid, hoid, 0x10000, 0x10000);
cerr << "Append 3*0x10000 bytes and punch a hole 0x10000~10000" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x20000, statfs.stored);
ASSERT_EQ(0x20000, statfs.allocated);
r = store->read(ch, hoid, 0, data.size(), newdata);
ASSERT_EQ(r, (int)data.size());
{
bufferlist expected;
expected.append(data.substr(0, 0x10000));
expected.append(string(0x10000, 0));
expected.append(data.substr(0x20000, 0x10000));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
r = store->read(ch, hoid, 1, data.size()-2, newdata);
ASSERT_EQ(r, (int)data.size()-2);
{
bufferlist expected;
expected.append(data.substr(1, 0x10000-1));
expected.append(string(0x10000, 0));
expected.append(data.substr(0x20000, 0x10000 - 1));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
}
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
{
ObjectStore::Transaction t;
std::string data2(3, 'b');
bufferlist bl, newdata;
bl.append(data2);
t.write(cid, hoid, 0x20000, bl.length(), bl);
cerr << "Write 3 bytes after the hole" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x20000, statfs.allocated);
ASSERT_EQ(0x20000, statfs.stored);
r = store->read(ch, hoid, 0x20000-1, 21, newdata);
ASSERT_EQ(r, (int)21);
{
bufferlist expected;
expected.append(string(0x1, 0));
expected.append(string(data2));
expected.append(data.substr(0x20003, 21-4));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
}
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
{
ObjectStore::Transaction t;
std::string data2(3, 'a');
bufferlist bl, newdata;
bl.append(data2);
t.write(cid, hoid, 0x10000+1, bl.length(), bl);
cerr << "Write 3 bytes to the hole" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x30000, statfs.allocated);
ASSERT_EQ(0x20003, statfs.stored);
r = store->read(ch, hoid, 0x10000-1, 0x10000+22, newdata);
ASSERT_EQ(r, (int)0x10000+22);
{
bufferlist expected;
expected.append(data.substr(0x10000-1, 1));
expected.append(string(0x1, 0));
expected.append(data2);
expected.append(string(0x10000-4, 0));
expected.append(string(0x3, 'b'));
expected.append(data.substr(0x20004, 21-3));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
}
{
ObjectStore::Transaction t;
bufferlist bl, newdata;
bl.append(string(0x30000, 'c'));
t.write(cid, hoid, 0, 0x30000, bl);
t.zero(cid, hoid, 0, 0x10000);
t.zero(cid, hoid, 0x20000, 0x10000);
cerr << "Rewrite an object and create two holes at the beginning and the end" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(0x10000, statfs.allocated);
ASSERT_EQ(0x10000, statfs.stored);
r = store->read(ch, hoid, 0, 0x30000, newdata);
ASSERT_EQ(r, (int)0x30000);
{
bufferlist expected;
expected.append(string(0x10000, 0));
expected.append(string(0x10000, 'c'));
expected.append(string(0x10000, 0));
ASSERT_TRUE(bl_eq(expected, newdata));
}
newdata.clear();
}
//force fsck
ch.reset();
EXPECT_EQ(store->umount(), 0);
EXPECT_EQ(store->mount(), 0);
ch = store->open_collection(cid);
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct store_statfs_t statfs;
r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ( 0u, statfs.allocated);
ASSERT_EQ( 0u, statfs.stored);
ASSERT_EQ( 0u, statfs.compressed_original);
ASSERT_EQ( 0u, statfs.compressed);
ASSERT_EQ( 0u, statfs.compressed_allocated);
}
}
#endif
TEST_P(StoreTest, ManySmallWrite)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
ghobject_t b(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
bufferptr bp(4096);
bp.zero();
bl.append(bp);
for (int i=0; i<100; ++i) {
ObjectStore::Transaction t;
t.write(cid, a, i*4096, 4096, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (int i=0; i<100; ++i) {
ObjectStore::Transaction t;
t.write(cid, b, (rand() % 1024)*4096, 4096, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove(cid, b);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, MultiSmallWriteSameBlock)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
bl.append("short");
C_SaferCond c, d;
// touch same block in both same transaction, tls, and pipelined txns
{
ObjectStore::Transaction t, u;
t.write(cid, a, 0, 5, bl, 0);
t.write(cid, a, 5, 5, bl, 0);
t.write(cid, a, 4094, 5, bl, 0);
t.write(cid, a, 9000, 5, bl, 0);
u.write(cid, a, 10, 5, bl, 0);
u.write(cid, a, 7000, 5, bl, 0);
t.register_on_commit(&c);
vector<ObjectStore::Transaction> v = {t, u};
store->queue_transactions(ch, v);
}
{
ObjectStore::Transaction t, u;
t.write(cid, a, 40, 5, bl, 0);
t.write(cid, a, 45, 5, bl, 0);
t.write(cid, a, 4094, 5, bl, 0);
t.write(cid, a, 6000, 5, bl, 0);
u.write(cid, a, 610, 5, bl, 0);
u.write(cid, a, 11000, 5, bl, 0);
t.register_on_commit(&d);
vector<ObjectStore::Transaction> v = {t, u};
store->queue_transactions(ch, v);
}
c.wait();
d.wait();
{
bufferlist bl2;
r = store->read(ch, a, 0, 16000, bl2);
ASSERT_GE(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SmallSkipFront)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.touch(cid, a);
t.truncate(cid, a, 3000);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist bl;
bufferptr bp(4096);
memset(bp.c_str(), 1, 4096);
bl.append(bp);
ObjectStore::Transaction t;
t.write(cid, a, 4096, 4096, bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist bl;
ASSERT_EQ(8192, store->read(ch, a, 0, 8192, bl));
for (unsigned i=0; i<4096; ++i)
ASSERT_EQ(0, bl[i]);
for (unsigned i=4096; i<8192; ++i)
ASSERT_EQ(1, bl[i]);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, AppendDeferredVsTailCache)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("fooo", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
unsigned min_alloc = g_conf->bluestore_min_alloc_size;
unsigned size = min_alloc / 3;
bufferptr bpa(size);
memset(bpa.c_str(), 1, bpa.length());
bufferlist bla;
bla.append(bpa);
{
ObjectStore::Transaction t;
t.write(cid, a, 0, bla.length(), bla, 0);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
// force cached tail to clear ...
{
ch.reset();
int r = store->umount();
ASSERT_EQ(0, r);
r = store->mount();
ASSERT_EQ(0, r);
ch = store->open_collection(cid);
}
bufferptr bpb(size);
memset(bpb.c_str(), 2, bpb.length());
bufferlist blb;
blb.append(bpb);
{
ObjectStore::Transaction t;
t.write(cid, a, bla.length(), blb.length(), blb, 0);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferptr bpc(size);
memset(bpc.c_str(), 3, bpc.length());
bufferlist blc;
blc.append(bpc);
{
ObjectStore::Transaction t;
t.write(cid, a, bla.length() + blb.length(), blc.length(), blc, 0);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist final;
final.append(bla);
final.append(blb);
final.append(blc);
bufferlist actual;
{
ASSERT_EQ((int)final.length(),
store->read(ch, a, 0, final.length(), actual));
ASSERT_TRUE(bl_eq(final, actual));
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, AppendZeroTrailingSharedBlock)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("fooo", CEPH_NOSNAP)));
ghobject_t b = a;
b.hobj.snap = 1;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
unsigned min_alloc = g_conf->bluestore_min_alloc_size;
unsigned size = min_alloc / 3;
bufferptr bpa(size);
memset(bpa.c_str(), 1, bpa.length());
bufferlist bla;
bla.append(bpa);
// make sure there is some trailing gunk in the last block
{
bufferlist bt;
bt.append(bla);
bt.append("BADBADBADBAD");
ObjectStore::Transaction t;
t.write(cid, a, 0, bt.length(), bt, 0);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.truncate(cid, a, size);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
// clone
{
ObjectStore::Transaction t;
t.clone(cid, a, b);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
// append with implicit zeroing
bufferptr bpb(size);
memset(bpb.c_str(), 2, bpb.length());
bufferlist blb;
blb.append(bpb);
{
ObjectStore::Transaction t;
t.write(cid, a, min_alloc * 3, blb.length(), blb, 0);
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist final;
final.append(bla);
bufferlist zeros;
zeros.append_zero(min_alloc * 3 - size);
final.append(zeros);
final.append(blb);
bufferlist actual;
{
ASSERT_EQ((int)final.length(),
store->read(ch, a, 0, final.length(), actual));
final.hexdump(cout);
actual.hexdump(cout);
ASSERT_TRUE(bl_eq(final, actual));
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove(cid, b);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = store->queue_transaction(ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SmallSequentialUnaligned)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
int len = 1000;
bufferptr bp(len);
bp.zero();
bl.append(bp);
for (int i=0; i<1000; ++i) {
ObjectStore::Transaction t;
t.write(cid, a, i*len, len, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, ManyBigWrite)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
ghobject_t b(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
bufferptr bp(4 * 1048576);
bp.zero();
bl.append(bp);
for (int i=0; i<10; ++i) {
ObjectStore::Transaction t;
t.write(cid, a, i*4*1048586, 4*1048576, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
// aligned
for (int i=0; i<10; ++i) {
ObjectStore::Transaction t;
t.write(cid, b, (rand() % 256)*4*1048576, 4*1048576, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
// unaligned
for (int i=0; i<10; ++i) {
ObjectStore::Transaction t;
t.write(cid, b, (rand() % (256*4096))*1024, 4*1048576, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
// do some zeros
for (int i=0; i<10; ++i) {
ObjectStore::Transaction t;
t.zero(cid, b, (rand() % (256*4096))*1024, 16*1048576);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove(cid, b);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, BigWriteBigZero)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
bufferptr bp(1048576);
memset(bp.c_str(), 'b', bp.length());
bl.append(bp);
bufferlist s;
bufferptr sp(4096);
memset(sp.c_str(), 's', sp.length());
s.append(sp);
{
ObjectStore::Transaction t;
t.write(cid, a, 0, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.zero(cid, a, bl.length() / 4, bl.length() / 2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.write(cid, a, bl.length() / 2, s.length(), s);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, MiscFragmentTests)
{
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
bufferptr bp(524288);
bp.zero();
bl.append(bp);
{
ObjectStore::Transaction t;
t.write(cid, a, 0, 524288, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.write(cid, a, 1048576, 524288, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist inbl;
int r = store->read(ch, a, 524288 + 131072, 1024, inbl);
ASSERT_EQ(r, 1024);
ASSERT_EQ(inbl.length(), 1024u);
ASSERT_TRUE(inbl.is_zero());
}
{
ObjectStore::Transaction t;
t.write(cid, a, 1048576 - 4096, 524288, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, ZeroVsObjectSize)
{
int r;
coll_t cid;
struct stat stat;
ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist a;
a.append("stuff");
{
ObjectStore::Transaction t;
t.write(cid, hoid, 0, 5, a);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_EQ(0, store->stat(ch, hoid, &stat));
ASSERT_EQ(5, stat.st_size);
{
ObjectStore::Transaction t;
t.zero(cid, hoid, 1, 2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_EQ(0, store->stat(ch, hoid, &stat));
ASSERT_EQ(5, stat.st_size);
{
ObjectStore::Transaction t;
t.zero(cid, hoid, 3, 200);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_EQ(0, store->stat(ch, hoid, &stat));
ASSERT_EQ(203, stat.st_size);
{
ObjectStore::Transaction t;
t.zero(cid, hoid, 100000, 200);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_EQ(0, store->stat(ch, hoid, &stat));
ASSERT_EQ(100200, stat.st_size);
}
TEST_P(StoreTest, ZeroLengthWrite)
{
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist empty;
t.write(cid, hoid, 1048576, 0, empty);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
struct stat stat;
r = store->stat(ch, hoid, &stat);
ASSERT_EQ(0, r);
ASSERT_EQ(0, stat.st_size);
bufferlist newdata;
r = store->read(ch, hoid, 0, 1048576, newdata);
ASSERT_EQ(0, r);
}
TEST_P(StoreTest, ZeroLengthZero)
{
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
{
ObjectStore::Transaction t;
t.zero(cid, hoid, 1048576, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
struct stat stat;
r = store->stat(ch, hoid, &stat);
ASSERT_EQ(0, r);
ASSERT_EQ(0, stat.st_size);
bufferlist newdata;
r = store->read(ch, hoid, 0, 1048576, newdata);
ASSERT_EQ(0, r);
}
TEST_P(StoreTest, SimpleAttrTest)
{
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("attr object 1", CEPH_NOSNAP)));
bufferlist val, val2;
val.append("value");
val.append("value2");
{
auto ch = store->open_collection(cid);
ASSERT_FALSE(ch);
}
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool empty;
int r = store->collection_empty(ch, &empty);
ASSERT_EQ(0, r);
ASSERT_TRUE(empty);
}
{
bufferptr bp;
r = store->getattr(ch, hoid, "nofoo", bp);
ASSERT_EQ(-ENOENT, r);
}
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
t.setattr(cid, hoid, "foo", val);
t.setattr(cid, hoid, "bar", val2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bool empty;
int r = store->collection_empty(ch, &empty);
ASSERT_EQ(0, r);
ASSERT_TRUE(!empty);
}
{
bufferptr bp;
r = store->getattr(ch, hoid, "nofoo", bp);
ASSERT_EQ(-ENODATA, r);
r = store->getattr(ch, hoid, "foo", bp);
ASSERT_EQ(0, r);
bufferlist bl;
bl.append(bp);
ASSERT_TRUE(bl_eq(val, bl));
map<string,bufferptr> bm;
r = store->getattrs(ch, hoid, bm);
ASSERT_EQ(0, r);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimpleListTest)
{
int r;
coll_t cid(spg_t(pg_t(0, 1), shard_id_t(1)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
set<ghobject_t> all;
{
ObjectStore::Transaction t;
for (int i=0; i<200; ++i) {
string name("object_");
name += stringify(i);
ghobject_t hoid(hobject_t(sobject_t(name, CEPH_NOSNAP)),
ghobject_t::NO_GEN, shard_id_t(1));
hoid.hobj.pool = 1;
all.insert(hoid);
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
}
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
set<ghobject_t> saw;
vector<ghobject_t> objects;
ghobject_t next, current;
while (!next.is_max()) {
int r = store->collection_list(ch, current, ghobject_t::get_max(),
50,
&objects, &next);
ASSERT_EQ(r, 0);
ASSERT_TRUE(sorted(objects));
cout << " got " << objects.size() << " next " << next << std::endl;
for (vector<ghobject_t>::iterator p = objects.begin(); p != objects.end();
++p) {
if (saw.count(*p)) {
cout << "got DUP " << *p << std::endl;
} else {
//cout << "got new " << *p << std::endl;
}
saw.insert(*p);
}
objects.clear();
current = next;
}
ASSERT_EQ(saw.size(), all.size());
ASSERT_EQ(saw, all);
}
{
ObjectStore::Transaction t;
for (set<ghobject_t>::iterator p = all.begin(); p != all.end(); ++p)
t.remove(cid, *p);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, ListEndTest)
{
int r;
coll_t cid(spg_t(pg_t(0, 1), shard_id_t(1)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
set<ghobject_t> all;
{
ObjectStore::Transaction t;
for (int i=0; i<200; ++i) {
string name("object_");
name += stringify(i);
ghobject_t hoid(hobject_t(sobject_t(name, CEPH_NOSNAP)),
ghobject_t::NO_GEN, shard_id_t(1));
hoid.hobj.pool = 1;
all.insert(hoid);
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
}
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ghobject_t end(hobject_t(sobject_t("object_100", CEPH_NOSNAP)),
ghobject_t::NO_GEN, shard_id_t(1));
end.hobj.pool = 1;
vector<ghobject_t> objects;
ghobject_t next;
int r = store->collection_list(ch, ghobject_t(), end, 500,
&objects, &next);
ASSERT_EQ(r, 0);
for (auto &p : objects) {
ASSERT_NE(p, end);
}
}
{
ObjectStore::Transaction t;
for (set<ghobject_t>::iterator p = all.begin(); p != all.end(); ++p)
t.remove(cid, *p);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, Sort)
{
{
hobject_t a(sobject_t("a", CEPH_NOSNAP));
hobject_t b = a;
ASSERT_EQ(a, b);
b.oid.name = "b";
ASSERT_NE(a, b);
ASSERT_TRUE(a < b);
a.pool = 1;
b.pool = 2;
ASSERT_TRUE(a < b);
a.pool = 3;
ASSERT_TRUE(a > b);
}
{
ghobject_t a(hobject_t(sobject_t("a", CEPH_NOSNAP)));
ghobject_t b(hobject_t(sobject_t("b", CEPH_NOSNAP)));
a.hobj.pool = 1;
b.hobj.pool = 1;
ASSERT_TRUE(a < b);
a.hobj.pool = -3;
ASSERT_TRUE(a < b);
a.hobj.pool = 1;
b.hobj.pool = -3;
ASSERT_TRUE(a > b);
}
}
TEST_P(StoreTest, MultipoolListTest)
{
int r;
int poolid = 4373;
coll_t cid = coll_t(spg_t(pg_t(0, poolid), shard_id_t::NO_SHARD));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
set<ghobject_t> all, saw;
{
ObjectStore::Transaction t;
for (int i=0; i<200; ++i) {
string name("object_");
name += stringify(i);
ghobject_t hoid(hobject_t(sobject_t(name, CEPH_NOSNAP)));
if (rand() & 1)
hoid.hobj.pool = -2 - poolid;
else
hoid.hobj.pool = poolid;
all.insert(hoid);
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
}
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
vector<ghobject_t> objects;
ghobject_t next, current;
while (!next.is_max()) {
int r = store->collection_list(ch, current, ghobject_t::get_max(), 50,
&objects, &next);
ASSERT_EQ(r, 0);
cout << " got " << objects.size() << " next " << next << std::endl;
for (vector<ghobject_t>::iterator p = objects.begin(); p != objects.end();
++p) {
saw.insert(*p);
}
objects.clear();
current = next;
}
ASSERT_EQ(saw, all);
}
{
ObjectStore::Transaction t;
for (set<ghobject_t>::iterator p = all.begin(); p != all.end(); ++p)
t.remove(cid, *p);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimpleCloneTest)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP),
"key", 123, -1, ""));
bufferlist small, large, xlarge, newdata, attr;
small.append("small");
large.append("large");
xlarge.append("xlarge");
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
t.setattr(cid, hoid, "attr1", small);
t.setattr(cid, hoid, "attr2", large);
t.setattr(cid, hoid, "attr3", xlarge);
t.write(cid, hoid, 0, small.length(), small);
t.write(cid, hoid, 10, small.length(), small);
cerr << "Creating object and set attr " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP),
"key", 123, -1, ""));
ghobject_t hoid3(hobject_t(sobject_t("Object 3", CEPH_NOSNAP)));
{
ObjectStore::Transaction t;
t.clone(cid, hoid, hoid2);
t.setattr(cid, hoid2, "attr2", small);
t.rmattr(cid, hoid2, "attr1");
t.write(cid, hoid, 10, large.length(), large);
t.setattr(cid, hoid, "attr1", large);
t.setattr(cid, hoid, "attr2", small);
cerr << "Clone object and rm attr" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid, 10, 5, newdata);
ASSERT_EQ(r, 5);
ASSERT_TRUE(bl_eq(large, newdata));
newdata.clear();
r = store->read(ch, hoid, 0, 5, newdata);
ASSERT_EQ(r, 5);
ASSERT_TRUE(bl_eq(small, newdata));
newdata.clear();
r = store->read(ch, hoid2, 10, 5, newdata);
ASSERT_EQ(r, 5);
ASSERT_TRUE(bl_eq(small, newdata));
r = store->getattr(ch, hoid2, "attr2", attr);
ASSERT_EQ(r, 0);
ASSERT_TRUE(bl_eq(small, attr));
attr.clear();
r = store->getattr(ch, hoid2, "attr3", attr);
ASSERT_EQ(r, 0);
ASSERT_TRUE(bl_eq(xlarge, attr));
attr.clear();
r = store->getattr(ch, hoid, "attr1", attr);
ASSERT_EQ(r, 0);
ASSERT_TRUE(bl_eq(large, attr));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
}
{
bufferlist final;
bufferptr p(16384);
memset(p.c_str(), 1, p.length());
bufferlist pl;
pl.append(p);
final.append(p);
ObjectStore::Transaction t;
t.write(cid, hoid, 0, pl.length(), pl);
t.clone(cid, hoid, hoid2);
bufferptr a(4096);
memset(a.c_str(), 2, a.length());
bufferlist al;
al.append(a);
final.append(a);
t.write(cid, hoid, pl.length(), a.length(), al);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist rl;
ASSERT_EQ((int)final.length(),
store->read(ch, hoid, 0, final.length(), rl));
ASSERT_TRUE(bl_eq(rl, final));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
}
{
bufferlist final;
bufferptr p(16384);
memset(p.c_str(), 111, p.length());
bufferlist pl;
pl.append(p);
final.append(p);
ObjectStore::Transaction t;
t.write(cid, hoid, 0, pl.length(), pl);
t.clone(cid, hoid, hoid2);
bufferptr z(4096);
z.zero();
final.append(z);
bufferptr a(4096);
memset(a.c_str(), 112, a.length());
bufferlist al;
al.append(a);
final.append(a);
t.write(cid, hoid, pl.length() + z.length(), a.length(), al);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist rl;
ASSERT_EQ((int)final.length(),
store->read(ch, hoid, 0, final.length(), rl));
ASSERT_TRUE(bl_eq(rl, final));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
}
{
bufferlist final;
bufferptr p(16000);
memset(p.c_str(), 5, p.length());
bufferlist pl;
pl.append(p);
final.append(p);
ObjectStore::Transaction t;
t.write(cid, hoid, 0, pl.length(), pl);
t.clone(cid, hoid, hoid2);
bufferptr z(1000);
z.zero();
final.append(z);
bufferptr a(8000);
memset(a.c_str(), 6, a.length());
bufferlist al;
al.append(a);
final.append(a);
t.write(cid, hoid, 17000, a.length(), al);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
bufferlist rl;
ASSERT_EQ((int)final.length(),
store->read(ch, hoid, 0, final.length(), rl));
/*cout << "expected:\n";
final.hexdump(cout);
cout << "got:\n";
rl.hexdump(cout);*/
ASSERT_TRUE(bl_eq(rl, final));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
}
{
bufferptr p(1048576);
memset(p.c_str(), 3, p.length());
bufferlist pl;
pl.append(p);
ObjectStore::Transaction t;
t.write(cid, hoid, 0, pl.length(), pl);
t.clone(cid, hoid, hoid2);
bufferptr a(65536);
memset(a.c_str(), 4, a.length());
bufferlist al;
al.append(a);
t.write(cid, hoid, a.length(), a.length(), al);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
bufferlist rl;
bufferlist final;
final.substr_of(pl, 0, al.length());
final.append(al);
bufferlist end;
end.substr_of(pl, al.length()*2, pl.length() - al.length()*2);
final.append(end);
ASSERT_EQ((int)final.length(),
store->read(ch, hoid, 0, final.length(), rl));
/*cout << "expected:\n";
final.hexdump(cout);
cout << "got:\n";
rl.hexdump(cout);*/
ASSERT_TRUE(bl_eq(rl, final));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
}
{
bufferptr p(65536);
memset(p.c_str(), 7, p.length());
bufferlist pl;
pl.append(p);
ObjectStore::Transaction t;
t.write(cid, hoid, 0, pl.length(), pl);
t.clone(cid, hoid, hoid2);
bufferptr a(4096);
memset(a.c_str(), 8, a.length());
bufferlist al;
al.append(a);
t.write(cid, hoid, 32768, a.length(), al);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
bufferlist rl;
bufferlist final;
final.substr_of(pl, 0, 32768);
final.append(al);
bufferlist end;
end.substr_of(pl, final.length(), pl.length() - final.length());
final.append(end);
ASSERT_EQ((int)final.length(),
store->read(ch, hoid, 0, final.length(), rl));
/*cout << "expected:\n";
final.hexdump(cout);
cout << "got:\n";
rl.hexdump(cout);*/
ASSERT_TRUE(bl_eq(rl, final));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
}
{
bufferptr p(65536);
memset(p.c_str(), 9, p.length());
bufferlist pl;
pl.append(p);
ObjectStore::Transaction t;
t.write(cid, hoid, 0, pl.length(), pl);
t.clone(cid, hoid, hoid2);
bufferptr a(4096);
memset(a.c_str(), 10, a.length());
bufferlist al;
al.append(a);
t.write(cid, hoid, 33768, a.length(), al);
ASSERT_EQ(0, queue_transaction(store, ch, std::move(t)));
bufferlist rl;
bufferlist final;
final.substr_of(pl, 0, 33768);
final.append(al);
bufferlist end;
end.substr_of(pl, final.length(), pl.length() - final.length());
final.append(end);
ASSERT_EQ((int)final.length(),
store->read(ch, hoid, 0, final.length(), rl));
/*cout << "expected:\n";
final.hexdump(cout);
cout << "got:\n";
rl.hexdump(cout);*/
ASSERT_TRUE(bl_eq(rl, final));
}
//Unfortunately we need a workaround for filestore since EXPECT_DEATH
// macro has potential issues when using /in multithread environments.
//It works well for all stores but filestore for now.
//A fix setting gtest_death_test_style = "threadsafe" doesn't help as well -
// test app clone asserts on store folder presence.
//
if (string(GetParam()) != "filestore") {
//verify if non-empty collection is properly handled after store reload
ch.reset();
r = store->umount();
ASSERT_EQ(r, 0);
r = store->mount();
ASSERT_EQ(r, 0);
ch = store->open_collection(cid);
ObjectStore::Transaction t;
t.remove_collection(cid);
cerr << "Invalid rm coll" << std::endl;
PrCtl unset_dumpable;
EXPECT_DEATH(queue_transaction(store, ch, std::move(t)), "");
}
{
ObjectStore::Transaction t;
t.touch(cid, hoid3); //new record in db
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
//See comment above for "filestore" check explanation.
if (string(GetParam()) != "filestore") {
ObjectStore::Transaction t;
//verify if non-empty collection is properly handled when there are some pending removes and live records in db
cerr << "Invalid rm coll again" << std::endl;
ch.reset();
r = store->umount();
ASSERT_EQ(r, 0);
r = store->mount();
ASSERT_EQ(r, 0);
ch = store->open_collection(cid);
t.remove(cid, hoid);
t.remove(cid, hoid2);
t.remove_collection(cid);
PrCtl unset_dumpable;
EXPECT_DEATH(queue_transaction(store, ch, std::move(t)), "");
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
t.remove(cid, hoid3);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, OmapSimple)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid(hobject_t(sobject_t("omap_obj", CEPH_NOSNAP),
"key", 123, -1, ""));
bufferlist small;
small.append("small");
map<string,bufferlist> km;
km["foo"] = small;
km["bar"].append("asdfjkasdkjdfsjkafskjsfdj");
bufferlist header;
header.append("this is a header");
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
t.omap_setkeys(cid, hoid, km);
t.omap_setheader(cid, hoid, header);
cerr << "Creating object and set omap " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
// get header, keys
{
bufferlist h;
map<string,bufferlist> r;
store->omap_get(ch, hoid, &h, &r);
ASSERT_TRUE(bl_eq(header, h));
ASSERT_EQ(r.size(), km.size());
cout << "r: " << r << std::endl;
}
// test iterator with seek_to_first
{
map<string,bufferlist> r;
ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, hoid);
for (iter->seek_to_first(); iter->valid(); iter->next(false)) {
r[iter->key()] = iter->value();
}
cout << "r: " << r << std::endl;
ASSERT_EQ(r.size(), km.size());
}
// test iterator with initial lower_bound
{
map<string,bufferlist> r;
ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, hoid);
for (iter->lower_bound(string()); iter->valid(); iter->next(false)) {
r[iter->key()] = iter->value();
}
cout << "r: " << r << std::endl;
ASSERT_EQ(r.size(), km.size());
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, OmapCloneTest)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP),
"key", 123, -1, ""));
bufferlist small;
small.append("small");
map<string,bufferlist> km;
km["foo"] = small;
km["bar"].append("asdfjkasdkjdfsjkafskjsfdj");
bufferlist header;
header.append("this is a header");
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
t.omap_setkeys(cid, hoid, km);
t.omap_setheader(cid, hoid, header);
cerr << "Creating object and set omap " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP),
"key", 123, -1, ""));
{
ObjectStore::Transaction t;
t.clone(cid, hoid, hoid2);
cerr << "Clone object" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
map<string,bufferlist> r;
bufferlist h;
store->omap_get(ch, hoid2, &h, &r);
ASSERT_TRUE(bl_eq(header, h));
ASSERT_EQ(r.size(), km.size());
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimpleCloneRangeTest)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
hoid.hobj.pool = -1;
bufferlist small, newdata;
small.append("small");
{
ObjectStore::Transaction t;
t.write(cid, hoid, 10, 5, small);
cerr << "Creating object and write bl " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
hoid2.hobj.pool = -1;
{
ObjectStore::Transaction t;
t.clone_range(cid, hoid, hoid2, 10, 5, 10);
cerr << "Clone range object" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
r = store->read(ch, hoid2, 10, 5, newdata);
ASSERT_EQ(r, 5);
ASSERT_TRUE(bl_eq(small, newdata));
}
{
ObjectStore::Transaction t;
t.truncate(cid, hoid, 1024*1024);
t.clone_range(cid, hoid, hoid2, 0, 1024*1024, 0);
cerr << "Clone range object" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
struct stat stat, stat2;
r = store->stat(ch, hoid, &stat);
r = store->stat(ch, hoid2, &stat2);
ASSERT_EQ(stat.st_size, stat2.st_size);
ASSERT_EQ(1024*1024, stat2.st_size);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove(cid, hoid2);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SimpleObjectLongnameTest)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ghobject_t hoid(hobject_t(sobject_t("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaObjectaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1", CEPH_NOSNAP)));
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
ghobject_t generate_long_name(unsigned i)
{
stringstream name;
name << "object id " << i << " ";
for (unsigned j = 0; j < 500; ++j) name << 'a';
ghobject_t hoid(hobject_t(sobject_t(name.str(), CEPH_NOSNAP)));
hoid.hobj.set_hash(i % 2);
return hoid;
}
TEST_P(StoreTest, LongnameSplitTest)
{
int r;
coll_t cid;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
for (unsigned i = 0; i < 320; ++i) {
ObjectStore::Transaction t;
ghobject_t hoid = generate_long_name(i);
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
ghobject_t test_obj = generate_long_name(319);
ghobject_t test_obj_2 = test_obj;
test_obj_2.generation = 0;
{
ObjectStore::Transaction t;
// should cause a split
t.collection_move_rename(
cid, test_obj,
cid, test_obj_2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
for (unsigned i = 0; i < 319; ++i) {
ObjectStore::Transaction t;
ghobject_t hoid = generate_long_name(i);
t.remove(cid, hoid);
cerr << "Removing object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
{
ObjectStore::Transaction t;
t.remove(cid, test_obj_2);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(0, r);
}
}
TEST_P(StoreTest, ManyObjectTest)
{
int NUM_OBJS = 2000;
int r = 0;
coll_t cid;
string base = "";
for (int i = 0; i < 100; ++i) base.append("aaaaa");
set<ghobject_t> created;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (int i = 0; i < NUM_OBJS; ++i) {
if (!(i % 5)) {
cerr << "Object " << i << std::endl;
}
ObjectStore::Transaction t;
char buf[100];
snprintf(buf, sizeof(buf), "%d", i);
ghobject_t hoid(hobject_t(sobject_t(string(buf) + base, CEPH_NOSNAP)));
t.touch(cid, hoid);
created.insert(hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (set<ghobject_t>::iterator i = created.begin();
i != created.end();
++i) {
struct stat buf;
ASSERT_TRUE(!store->stat(ch, *i, &buf));
}
set<ghobject_t> listed, listed2;
vector<ghobject_t> objects;
r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(), INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
cerr << "objects.size() is " << objects.size() << std::endl;
for (vector<ghobject_t> ::iterator i = objects.begin();
i != objects.end();
++i) {
listed.insert(*i);
ASSERT_TRUE(created.count(*i));
}
ASSERT_TRUE(listed.size() == created.size());
ghobject_t start, next;
objects.clear();
r = store->collection_list(
ch,
ghobject_t::get_max(),
ghobject_t::get_max(),
50,
&objects,
&next
);
ASSERT_EQ(r, 0);
ASSERT_TRUE(objects.empty());
objects.clear();
listed.clear();
ghobject_t start2, next2;
while (1) {
r = store->collection_list(ch, start, ghobject_t::get_max(),
50,
&objects,
&next);
ASSERT_TRUE(sorted(objects));
ASSERT_EQ(r, 0);
listed.insert(objects.begin(), objects.end());
if (objects.size() < 50) {
ASSERT_TRUE(next.is_max());
break;
}
objects.clear();
start = next;
}
cerr << "listed.size() is " << listed.size() << std::endl;
ASSERT_TRUE(listed.size() == created.size());
if (listed2.size()) {
ASSERT_EQ(listed.size(), listed2.size());
}
for (set<ghobject_t>::iterator i = listed.begin();
i != listed.end();
++i) {
ASSERT_TRUE(created.count(*i));
}
for (set<ghobject_t>::iterator i = created.begin();
i != created.end();
++i) {
ObjectStore::Transaction t;
t.remove(cid, *i);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
cerr << "cleaning up" << std::endl;
{
ObjectStore::Transaction t;
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
class ObjectGenerator
{
public:
virtual ghobject_t create_object(gen_type *gen) = 0;
virtual ~ObjectGenerator() {}
};
class MixedGenerator : public ObjectGenerator
{
public:
unsigned seq;
int64_t poolid;
explicit MixedGenerator(int64_t p) : seq(0), poolid(p) {}
ghobject_t create_object(gen_type *gen) override
{
char buf[100];
snprintf(buf, sizeof(buf), "OBJ_%u", seq);
string name(buf);
if (seq % 2) {
for (unsigned i = 0; i < 300; ++i) {
name.push_back('a');
}
}
++seq;
return ghobject_t(
hobject_t(
name, string(), rand() & 2 ? CEPH_NOSNAP : rand(),
(((seq / 1024) % 2) * 0xF00 ) +
(seq & 0xFF),
poolid, ""));
}
};
class SyntheticWorkloadState
{
struct Object {
bufferlist data;
map<string, bufferlist> attrs;
};
public:
static const unsigned max_in_flight = 16;
static const unsigned max_objects = 3000;
static const unsigned max_attr_size = 5;
static const unsigned max_attr_name_len = 100;
static const unsigned max_attr_value_len = 1024 * 64;
coll_t cid;
unsigned write_alignment;
unsigned max_object_len, max_write_len;
unsigned in_flight;
map<ghobject_t, Object> contents;
set<ghobject_t> available_objects;
set<ghobject_t> in_flight_objects;
ObjectGenerator *object_gen;
gen_type *rng;
ObjectStore *store;
ObjectStore::CollectionHandle ch;
Mutex lock;
Cond cond;
struct EnterExit {
const char *msg;
explicit EnterExit(const char *m) : msg(m)
{
//cout << pthread_self() << " enter " << msg << std::endl;
}
~EnterExit()
{
//cout << pthread_self() << " exit " << msg << std::endl;
}
};
class C_SyntheticOnReadable : public Context
{
public:
SyntheticWorkloadState *state;
ghobject_t hoid;
C_SyntheticOnReadable(SyntheticWorkloadState *state, ghobject_t hoid)
: state(state), hoid(hoid) {}
void finish(int r) override
{
Mutex::Locker locker(state->lock);
EnterExit ee("onreadable finish");
ASSERT_TRUE(state->in_flight_objects.count(hoid));
ASSERT_EQ(r, 0);
state->in_flight_objects.erase(hoid);
if (state->contents.count(hoid))
state->available_objects.insert(hoid);
--(state->in_flight);
state->cond.Signal();
bufferlist r2;
r = state->store->read(state->ch, hoid, 0, state->contents[hoid].data.length(), r2);
assert(bl_eq(state->contents[hoid].data, r2));
state->cond.Signal();
}
};
class C_SyntheticOnStash : public Context
{
public:
SyntheticWorkloadState *state;
ghobject_t oid, noid;
C_SyntheticOnStash(SyntheticWorkloadState *state,
ghobject_t oid, ghobject_t noid)
: state(state), oid(oid), noid(noid) {}
void finish(int r) override
{
Mutex::Locker locker(state->lock);
EnterExit ee("stash finish");
ASSERT_TRUE(state->in_flight_objects.count(oid));
ASSERT_EQ(r, 0);
state->in_flight_objects.erase(oid);
if (state->contents.count(noid))
state->available_objects.insert(noid);
--(state->in_flight);
bufferlist r2;
r = state->store->read(
state->ch, noid, 0,
state->contents[noid].data.length(), r2);
assert(bl_eq(state->contents[noid].data, r2));
state->cond.Signal();
}
};
class C_SyntheticOnClone : public Context
{
public:
SyntheticWorkloadState *state;
ghobject_t oid, noid;
C_SyntheticOnClone(SyntheticWorkloadState *state,
ghobject_t oid, ghobject_t noid)
: state(state), oid(oid), noid(noid) {}
void finish(int r) override
{
Mutex::Locker locker(state->lock);
EnterExit ee("clone finish");
ASSERT_TRUE(state->in_flight_objects.count(oid));
ASSERT_EQ(r, 0);
state->in_flight_objects.erase(oid);
if (state->contents.count(oid))
state->available_objects.insert(oid);
if (state->contents.count(noid))
state->available_objects.insert(noid);
--(state->in_flight);
bufferlist r2;
r = state->store->read(state->ch, noid, 0, state->contents[noid].data.length(), r2);
assert(bl_eq(state->contents[noid].data, r2));
state->cond.Signal();
}
};
static void filled_byte_array(bufferlist& bl, size_t size)
{
static const char alphanum[] = "0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz";
if (!size) {
return;
}
bufferptr bp(size);
for (unsigned int i = 0; i < size - 1; i++) {
// severely limit entropy so we can compress...
bp[i] = alphanum[rand() % 10]; //(sizeof(alphanum) - 1)];
}
bp[size - 1] = '\0';
bl.append(bp);
}
SyntheticWorkloadState(ObjectStore *store,
ObjectGenerator *gen,
gen_type *rng,
coll_t cid,
unsigned max_size,
unsigned max_write,
unsigned alignment)
: cid(cid), write_alignment(alignment), max_object_len(max_size),
max_write_len(max_write), in_flight(0), object_gen(gen),
rng(rng), store(store),
lock("State lock") {}
int init()
{
ObjectStore::Transaction t;
ch = store->create_new_collection(cid);
t.create_collection(cid, 0);
return queue_transaction(store, ch, std::move(t));
}
void shutdown()
{
while (1) {
vector<ghobject_t> objects;
int r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(),
10, &objects, 0);
assert(r >= 0);
if (objects.empty())
break;
ObjectStore::Transaction t;
for (vector<ghobject_t>::iterator p = objects.begin();
p != objects.end(); ++p) {
t.remove(cid, *p);
}
queue_transaction(store, ch, std::move(t));
}
ObjectStore::Transaction t;
t.remove_collection(cid);
queue_transaction(store, ch, std::move(t));
}
void statfs(store_statfs_t& stat)
{
store->statfs(&stat);
}
ghobject_t get_uniform_random_object()
{
while (in_flight >= max_in_flight || available_objects.empty())
cond.Wait(lock);
boost::uniform_int<> choose(0, available_objects.size() - 1);
int index = choose(*rng);
set<ghobject_t>::iterator i = available_objects.begin();
for ( ; index > 0; --index, ++i) ;
ghobject_t ret = *i;
return ret;
}
void wait_for_ready()
{
while (in_flight >= max_in_flight)
cond.Wait(lock);
}
void wait_for_done()
{
Mutex::Locker locker(lock);
while (in_flight)
cond.Wait(lock);
}
bool can_create()
{
return (available_objects.size() + in_flight_objects.size()) < max_objects;
}
bool can_unlink()
{
return (available_objects.size() + in_flight_objects.size()) > 0;
}
unsigned get_random_alloc_hints()
{
unsigned f = 0;
{
boost::uniform_int<> u(0, 3);
switch (u(*rng)) {
case 1:
f |= CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE;
break;
case 2:
f |= CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_WRITE;
break;
}
}
{
boost::uniform_int<> u(0, 3);
switch (u(*rng)) {
case 1:
f |= CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_READ;
break;
case 2:
f |= CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_READ;
break;
}
}
{
// append_only, immutable
boost::uniform_int<> u(0, 4);
f |= u(*rng) << 4;
}
{
boost::uniform_int<> u(0, 3);
switch (u(*rng)) {
case 1:
f |= CEPH_OSD_ALLOC_HINT_FLAG_SHORTLIVED;
break;
case 2:
f |= CEPH_OSD_ALLOC_HINT_FLAG_LONGLIVED;
break;
}
}
{
boost::uniform_int<> u(0, 3);
switch (u(*rng)) {
case 1:
f |= CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
break;
case 2:
f |= CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
break;
}
}
return f;
}
int touch()
{
Mutex::Locker locker(lock);
EnterExit ee("touch");
if (!can_create())
return -ENOSPC;
wait_for_ready();
ghobject_t new_obj = object_gen->create_object(rng);
available_objects.erase(new_obj);
ObjectStore::Transaction t;
t.touch(cid, new_obj);
boost::uniform_int<> u(17, 22);
boost::uniform_int<> v(12, 17);
t.set_alloc_hint(cid, new_obj,
1ull << u(*rng),
1ull << v(*rng),
get_random_alloc_hints());
++in_flight;
in_flight_objects.insert(new_obj);
if (!contents.count(new_obj))
contents[new_obj] = Object();
t.register_on_applied(new C_SyntheticOnReadable(this, new_obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
int stash()
{
Mutex::Locker locker(lock);
EnterExit ee("stash");
if (!can_unlink())
return -ENOENT;
if (!can_create())
return -ENOSPC;
wait_for_ready();
ghobject_t old_obj;
int max = 20;
do {
old_obj = get_uniform_random_object();
} while (--max && !contents[old_obj].data.length());
available_objects.erase(old_obj);
ghobject_t new_obj = old_obj;
new_obj.generation++;
available_objects.erase(new_obj);
ObjectStore::Transaction t;
t.collection_move_rename(cid, old_obj, cid, new_obj);
++in_flight;
in_flight_objects.insert(old_obj);
contents[new_obj].attrs = contents[old_obj].attrs;
contents[new_obj].data = contents[old_obj].data;
contents.erase(old_obj);
t.register_on_applied(new C_SyntheticOnStash(this, old_obj, new_obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
int clone()
{
Mutex::Locker locker(lock);
EnterExit ee("clone");
if (!can_unlink())
return -ENOENT;
if (!can_create())
return -ENOSPC;
wait_for_ready();
ghobject_t old_obj;
int max = 20;
do {
old_obj = get_uniform_random_object();
} while (--max && !contents[old_obj].data.length());
available_objects.erase(old_obj);
ghobject_t new_obj = object_gen->create_object(rng);
// make the hash match
new_obj.hobj.set_hash(old_obj.hobj.get_hash());
available_objects.erase(new_obj);
ObjectStore::Transaction t;
t.clone(cid, old_obj, new_obj);
++in_flight;
in_flight_objects.insert(old_obj);
contents[new_obj].attrs = contents[old_obj].attrs;
contents[new_obj].data = contents[old_obj].data;
t.register_on_applied(new C_SyntheticOnClone(this, old_obj, new_obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
int clone_range()
{
Mutex::Locker locker(lock);
EnterExit ee("clone_range");
if (!can_unlink())
return -ENOENT;
if (!can_create())
return -ENOSPC;
wait_for_ready();
ghobject_t old_obj;
int max = 20;
do {
old_obj = get_uniform_random_object();
} while (--max && !contents[old_obj].data.length());
bufferlist &srcdata = contents[old_obj].data;
if (srcdata.length() == 0) {
return 0;
}
available_objects.erase(old_obj);
ghobject_t new_obj = get_uniform_random_object();
available_objects.erase(new_obj);
boost::uniform_int<> u1(0, max_object_len - max_write_len);
boost::uniform_int<> u2(0, max_write_len);
uint64_t srcoff = u1(*rng);
// make src and dst offsets match, since that's what the osd does
uint64_t dstoff = srcoff; //u1(*rng);
uint64_t len = u2(*rng);
if (write_alignment) {
srcoff = round_up_to(srcoff, write_alignment);
dstoff = round_up_to(dstoff, write_alignment);
len = round_up_to(len, write_alignment);
}
if (srcoff > srcdata.length() - 1) {
srcoff = srcdata.length() - 1;
}
if (srcoff + len > srcdata.length()) {
len = srcdata.length() - srcoff;
}
if (0)
cout << __func__ << " from " << srcoff << "~" << len
<< " (size " << srcdata.length() << ") to "
<< dstoff << "~" << len << std::endl;
ObjectStore::Transaction t;
t.clone_range(cid, old_obj, new_obj, srcoff, len, dstoff);
++in_flight;
in_flight_objects.insert(old_obj);
bufferlist bl;
if (srcoff < srcdata.length()) {
if (srcoff + len > srcdata.length()) {
bl.substr_of(srcdata, srcoff, srcdata.length() - srcoff);
} else {
bl.substr_of(srcdata, srcoff, len);
}
}
bufferlist& dstdata = contents[new_obj].data;
if (dstdata.length() <= dstoff) {
if (bl.length() > 0) {
dstdata.append_zero(dstoff - dstdata.length());
dstdata.append(bl);
}
} else {
bufferlist value;
assert(dstdata.length() > dstoff);
dstdata.copy(0, dstoff, value);
value.append(bl);
if (value.length() < dstdata.length())
dstdata.copy(value.length(),
dstdata.length() - value.length(), value);
value.swap(dstdata);
}
t.register_on_applied(new C_SyntheticOnClone(this, old_obj, new_obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
int write()
{
Mutex::Locker locker(lock);
EnterExit ee("write");
if (!can_unlink())
return -ENOENT;
wait_for_ready();
ghobject_t new_obj = get_uniform_random_object();
available_objects.erase(new_obj);
ObjectStore::Transaction t;
boost::uniform_int<> u1(0, max_object_len - max_write_len);
boost::uniform_int<> u2(0, max_write_len);
uint64_t offset = u1(*rng);
uint64_t len = u2(*rng);
bufferlist bl;
if (write_alignment) {
offset = round_up_to(offset, write_alignment);
len = round_up_to(len, write_alignment);
}
filled_byte_array(bl, len);
bufferlist& data = contents[new_obj].data;
if (data.length() <= offset) {
if (len > 0) {
data.append_zero(offset-data.length());
data.append(bl);
}
} else {
bufferlist value;
assert(data.length() > offset);
data.copy(0, offset, value);
value.append(bl);
if (value.length() < data.length())
data.copy(value.length(),
data.length()-value.length(), value);
value.swap(data);
}
t.write(cid, new_obj, offset, len, bl);
++in_flight;
in_flight_objects.insert(new_obj);
t.register_on_applied(new C_SyntheticOnReadable(this, new_obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
int truncate()
{
Mutex::Locker locker(lock);
EnterExit ee("truncate");
if (!can_unlink())
return -ENOENT;
wait_for_ready();
ghobject_t obj = get_uniform_random_object();
available_objects.erase(obj);
ObjectStore::Transaction t;
boost::uniform_int<> choose(0, max_object_len);
size_t len = choose(*rng);
if (write_alignment) {
len = round_up_to(len, write_alignment);
}
t.truncate(cid, obj, len);
++in_flight;
in_flight_objects.insert(obj);
bufferlist& data = contents[obj].data;
if (data.length() <= len) {
data.append_zero(len - data.length());
} else {
bufferlist bl;
data.copy(0, len, bl);
bl.swap(data);
}
t.register_on_applied(new C_SyntheticOnReadable(this, obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
int zero()
{
Mutex::Locker locker(lock);
EnterExit ee("zero");
if (!can_unlink())
return -ENOENT;
wait_for_ready();
ghobject_t new_obj = get_uniform_random_object();
available_objects.erase(new_obj);
ObjectStore::Transaction t;
boost::uniform_int<> u1(0, max_object_len - max_write_len);
boost::uniform_int<> u2(0, max_write_len);
uint64_t offset = u1(*rng);
uint64_t len = u2(*rng);
if (write_alignment) {
offset = round_up_to(offset, write_alignment);
len = round_up_to(len, write_alignment);
}
if (len > 0) {
auto& data = contents[new_obj].data;
if (data.length() < offset + len) {
data.append_zero(offset+len-data.length());
}
bufferlist n;
n.substr_of(data, 0, offset);
n.append_zero(len);
if (data.length() > offset + len)
data.copy(offset + len, data.length() - offset - len, n);
data.swap(n);
}
t.zero(cid, new_obj, offset, len);
++in_flight;
in_flight_objects.insert(new_obj);
t.register_on_applied(new C_SyntheticOnReadable(this, new_obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
void read()
{
EnterExit ee("read");
boost::uniform_int<> u1(0, max_object_len/2);
boost::uniform_int<> u2(0, max_object_len);
uint64_t offset = u1(*rng);
uint64_t len = u2(*rng);
if (offset > len)
swap(offset, len);
ghobject_t obj;
bufferlist expected;
int r;
{
Mutex::Locker locker(lock);
EnterExit ee("read locked");
if (!can_unlink())
return ;
wait_for_ready();
obj = get_uniform_random_object();
expected = contents[obj].data;
}
bufferlist bl, result;
if (0) cout << " obj " << obj
<< " size " << expected.length()
<< " offset " << offset
<< " len " << len << std::endl;
r = store->read(ch, obj, offset, len, result);
if (offset >= expected.length()) {
ASSERT_EQ(r, 0);
} else {
size_t max_len = expected.length() - offset;
if (len > max_len)
len = max_len;
assert(len == result.length());
ASSERT_EQ(len, result.length());
expected.copy(offset, len, bl);
ASSERT_EQ(r, (int)len);
ASSERT_TRUE(bl_eq(bl, result));
}
}
int setattrs()
{
Mutex::Locker locker(lock);
EnterExit ee("setattrs");
if (!can_unlink())
return -ENOENT;
wait_for_ready();
ghobject_t obj = get_uniform_random_object();
available_objects.erase(obj);
ObjectStore::Transaction t;
boost::uniform_int<> u0(1, max_attr_size);
boost::uniform_int<> u1(4, max_attr_name_len);
boost::uniform_int<> u2(4, max_attr_value_len);
boost::uniform_int<> u3(0, 100);
uint64_t size = u0(*rng);
uint64_t name_len;
map<string, bufferlist> attrs;
set<string> keys;
for (map<string, bufferlist>::iterator it = contents[obj].attrs.begin();
it != contents[obj].attrs.end(); ++it)
keys.insert(it->first);
while (size--) {
bufferlist name, value;
uint64_t get_exist = u3(*rng);
uint64_t value_len = u2(*rng);
filled_byte_array(value, value_len);
if (get_exist < 50 && keys.size()) {
set<string>::iterator k = keys.begin();
attrs[*k] = value;
contents[obj].attrs[*k] = value;
keys.erase(k);
} else {
name_len = u1(*rng);
filled_byte_array(name, name_len);
attrs[name.c_str()] = value;
contents[obj].attrs[name.c_str()] = value;
}
}
t.setattrs(cid, obj, attrs);
++in_flight;
in_flight_objects.insert(obj);
t.register_on_applied(new C_SyntheticOnReadable(this, obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
void getattrs()
{
EnterExit ee("getattrs");
ghobject_t obj;
map<string, bufferlist> expected;
{
Mutex::Locker locker(lock);
EnterExit ee("getattrs locked");
if (!can_unlink())
return ;
wait_for_ready();
int retry = 10;
do {
obj = get_uniform_random_object();
if (!--retry)
return ;
} while (contents[obj].attrs.empty());
expected = contents[obj].attrs;
}
map<string, bufferlist> attrs;
int r = store->getattrs(ch, obj, attrs);
ASSERT_TRUE(r == 0);
ASSERT_TRUE(attrs.size() == expected.size());
for (map<string, bufferlist>::iterator it = expected.begin();
it != expected.end(); ++it) {
ASSERT_TRUE(bl_eq(attrs[it->first], it->second));
}
}
void getattr()
{
EnterExit ee("getattr");
ghobject_t obj;
int r;
int retry;
map<string, bufferlist> expected;
{
Mutex::Locker locker(lock);
EnterExit ee("getattr locked");
if (!can_unlink())
return ;
wait_for_ready();
retry = 10;
do {
obj = get_uniform_random_object();
if (!--retry)
return ;
} while (contents[obj].attrs.empty());
expected = contents[obj].attrs;
}
boost::uniform_int<> u(0, expected.size()-1);
retry = u(*rng);
map<string, bufferlist>::iterator it = expected.begin();
while (retry) {
retry--;
++it;
}
bufferlist bl;
r = store->getattr(ch, obj, it->first, bl);
ASSERT_EQ(r, 0);
ASSERT_TRUE(bl_eq(it->second, bl));
}
int rmattr()
{
Mutex::Locker locker(lock);
EnterExit ee("rmattr");
if (!can_unlink())
return -ENOENT;
wait_for_ready();
ghobject_t obj;
int retry = 10;
do {
obj = get_uniform_random_object();
if (!--retry)
return 0;
} while (contents[obj].attrs.empty());
boost::uniform_int<> u(0, contents[obj].attrs.size()-1);
retry = u(*rng);
map<string, bufferlist>::iterator it = contents[obj].attrs.begin();
while (retry) {
retry--;
++it;
}
available_objects.erase(obj);
ObjectStore::Transaction t;
t.rmattr(cid, obj, it->first);
contents[obj].attrs.erase(it->first);
++in_flight;
in_flight_objects.insert(obj);
t.register_on_applied(new C_SyntheticOnReadable(this, obj));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
void fsck(bool deep)
{
Mutex::Locker locker(lock);
EnterExit ee("fsck");
while (in_flight)
cond.Wait(lock);
ch.reset();
store->umount();
int r = store->fsck(deep);
assert(r == 0 || r == -EOPNOTSUPP);
store->mount();
ch = store->open_collection(cid);
}
void scan()
{
Mutex::Locker locker(lock);
EnterExit ee("scan");
while (in_flight)
cond.Wait(lock);
vector<ghobject_t> objects;
set<ghobject_t> objects_set, objects_set2;
ghobject_t next, current;
while (1) {
//cerr << "scanning..." << std::endl;
int r = store->collection_list(ch, current, ghobject_t::get_max(), 100,
&objects, &next);
ASSERT_EQ(r, 0);
ASSERT_TRUE(sorted(objects));
objects_set.insert(objects.begin(), objects.end());
objects.clear();
if (next.is_max()) break;
current = next;
}
if (objects_set.size() != available_objects.size()) {
for (set<ghobject_t>::iterator p = objects_set.begin();
p != objects_set.end();
++p)
if (available_objects.count(*p) == 0) {
cerr << "+ " << *p << std::endl;
ceph_abort();
}
for (set<ghobject_t>::iterator p = available_objects.begin();
p != available_objects.end();
++p)
if (objects_set.count(*p) == 0)
cerr << "- " << *p << std::endl;
//cerr << " objects_set: " << objects_set << std::endl;
//cerr << " available_set: " << available_objects << std::endl;
assert(0 == "badness");
}
ASSERT_EQ(objects_set.size(), available_objects.size());
for (set<ghobject_t>::iterator i = objects_set.begin();
i != objects_set.end();
++i) {
ASSERT_GT(available_objects.count(*i), (unsigned)0);
}
int r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(),
INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
objects_set2.insert(objects.begin(), objects.end());
ASSERT_EQ(objects_set2.size(), available_objects.size());
for (set<ghobject_t>::iterator i = objects_set2.begin();
i != objects_set2.end();
++i) {
ASSERT_GT(available_objects.count(*i), (unsigned)0);
if (available_objects.count(*i) == 0) {
cerr << "+ " << *i << std::endl;
}
}
}
void stat()
{
EnterExit ee("stat");
ghobject_t hoid;
uint64_t expected;
{
Mutex::Locker locker(lock);
EnterExit ee("stat lock1");
if (!can_unlink())
return ;
hoid = get_uniform_random_object();
in_flight_objects.insert(hoid);
available_objects.erase(hoid);
++in_flight;
expected = contents[hoid].data.length();
}
struct stat buf;
int r = store->stat(ch, hoid, &buf);
ASSERT_EQ(0, r);
assert((uint64_t)buf.st_size == expected);
ASSERT_TRUE((uint64_t)buf.st_size == expected);
{
Mutex::Locker locker(lock);
EnterExit ee("stat lock2");
--in_flight;
cond.Signal();
in_flight_objects.erase(hoid);
available_objects.insert(hoid);
}
}
int unlink()
{
Mutex::Locker locker(lock);
EnterExit ee("unlink");
if (!can_unlink())
return -ENOENT;
ghobject_t to_remove = get_uniform_random_object();
ObjectStore::Transaction t;
t.remove(cid, to_remove);
++in_flight;
available_objects.erase(to_remove);
in_flight_objects.insert(to_remove);
contents.erase(to_remove);
t.register_on_applied(new C_SyntheticOnReadable(this, to_remove));
int status = store->queue_transaction(ch, std::move(t));
return status;
}
void print_internal_state()
{
Mutex::Locker locker(lock);
cerr << "available_objects: " << available_objects.size()
<< " in_flight_objects: " << in_flight_objects.size()
<< " total objects: " << in_flight_objects.size() + available_objects.size()
<< " in_flight " << in_flight << std::endl;
}
};
void StoreTest::doSyntheticTest(
int num_ops,
uint64_t max_obj, uint64_t max_wr, uint64_t align)
{
MixedGenerator gen(555);
gen_type rng(time(NULL));
coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD));
SetVal(g_conf, "bluestore_fsck_on_mount", "false");
SetVal(g_conf, "bluestore_fsck_on_umount", "false");
g_ceph_context->_conf->apply_changes(NULL);
SyntheticWorkloadState test_obj(store.get(), &gen, &rng, cid,
max_obj, max_wr, align);
test_obj.init();
for (int i = 0; i < num_ops/10; ++i) {
if (!(i % 500)) cerr << "seeding object " << i << std::endl;
test_obj.touch();
}
for (int i = 0; i < num_ops; ++i) {
if (!(i % 1000)) {
cerr << "Op " << i << std::endl;
test_obj.print_internal_state();
}
boost::uniform_int<> true_false(0, 999);
int val = true_false(rng);
if (val > 998) {
test_obj.fsck(true);
} else if (val > 997) {
test_obj.fsck(false);
} else if (val > 970) {
test_obj.scan();
} else if (val > 950) {
test_obj.stat();
} else if (val > 850) {
test_obj.zero();
} else if (val > 800) {
test_obj.unlink();
} else if (val > 550) {
test_obj.write();
} else if (val > 500) {
test_obj.clone();
} else if (val > 450) {
test_obj.clone_range();
} else if (val > 300) {
test_obj.stash();
} else if (val > 100) {
test_obj.read();
} else {
test_obj.truncate();
}
}
test_obj.wait_for_done();
test_obj.shutdown();
}
TEST_P(StoreTest, Synthetic)
{
doSyntheticTest(10000, 400*1024, 40*1024, 0);
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixSharding)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "4096", 0 }, // must be the first!
{ "num_ops", "50000", 0 },
{ "max_write", "65536", 0 },
{ "max_size", "262144", 0 },
{ "alignment", "4096", 0 },
{ "bluestore_max_blob_size", "65536", 0 },
{ "bluestore_extent_map_shard_min_size", "60", 0 },
{ "bluestore_extent_map_shard_max_size", "300", 0 },
{ "bluestore_extent_map_shard_target_size", "150", 0 },
{ "bluestore_default_buffered_read", "true", 0 },
{ "bluestore_default_buffered_write", "true", 0 },
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTestSpecificAUSize, ZipperPatternSharded)
{
if(string(GetParam()) != "bluestore")
return;
StartDeferred(4096);
int r;
coll_t cid;
ghobject_t a(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist bl;
int len = 4096;
bufferptr bp(len);
bp.zero();
bl.append(bp);
for (int i=0; i<1000; ++i) {
ObjectStore::Transaction t;
t.write(cid, a, i*2*len, len, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (int i=0; i<1000; ++i) {
ObjectStore::Transaction t;
t.write(cid, a, i*2*len + 1, len, bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, a);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixCsumAlgorithm)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "65536", 0 }, // must be the first!
{ "max_write", "65536", 0 },
{ "max_size", "1048576", 0 },
{ "alignment", "16", 0 },
{
"bluestore_csum_type", "crc32c", "crc32c_16", "crc32c_8", "xxhash32",
"xxhash64", "none", 0
},
{ "bluestore_default_buffered_write", "false", 0 },
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixCsumVsCompression)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "4096", "16384", 0 }, //to be the first!
{ "max_write", "131072", 0 },
{ "max_size", "262144", 0 },
{ "alignment", "512", 0 },
{ "bluestore_compression_mode", "force", 0},
{ "bluestore_compression_algorithm", "snappy", "zlib", 0 },
{ "bluestore_csum_type", "crc32c", 0 },
{ "bluestore_default_buffered_read", "true", "false", 0 },
{ "bluestore_default_buffered_write", "true", "false", 0 },
{ "bluestore_sync_submit_transaction", "false", 0 },
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixCompression)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "4096", "65536", 0 }, // to be the first!
{ "max_write", "1048576", 0 },
{ "max_size", "4194304", 0 },
{ "alignment", "65536", 0 },
{ "bluestore_compression_mode", "force", "aggressive", "passive", "none", 0},
{ "bluestore_default_buffered_write", "false", 0 },
{ "bluestore_sync_submit_transaction", "true", 0 },
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixCompressionAlgorithm)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "4096", "65536", 0 }, // to be the first!
{ "max_write", "1048576", 0 },
{ "max_size", "4194304", 0 },
{ "alignment", "65536", 0 },
{ "bluestore_compression_algorithm", "zlib", "snappy", 0 },
{ "bluestore_compression_mode", "force", 0 },
{ "bluestore_default_buffered_write", "false", 0 },
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixNoCsum)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "4096", "65536", 0 }, // to be the first!
{ "max_write", "65536", 0 },
{ "max_size", "1048576", 0 },
{ "alignment", "512", 0 },
{ "bluestore_max_blob_size", "262144", 0 },
{ "bluestore_compression_mode", "force", "none", 0},
{ "bluestore_csum_type", "none", 0},
{ "bluestore_default_buffered_read", "true", "false", 0 },
{ "bluestore_default_buffered_write", "true", 0 },
{ "bluestore_sync_submit_transaction", "true", "false", 0 },
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTestSpecificAUSize, SyntheticMatrixPreferDeferred)
{
if (string(GetParam()) != "bluestore")
return;
const char *m[][10] = {
{ "bluestore_min_alloc_size", "4096", "65536", 0 }, // to be the first!
{ "max_write", "65536", 0 },
{ "max_size", "1048576", 0 },
{ "alignment", "512", 0 },
{ "bluestore_max_blob_size", "262144", 0 },
{ "bluestore_compression_mode", "force", "none", 0},
{ "bluestore_prefer_deferred_size", "32768", "0", 0},
{ 0 },
};
do_matrix(m, std::bind(&StoreTest::doSyntheticTest, this, _1, _2, _3, _4));
}
TEST_P(StoreTest, AttrSynthetic)
{
MixedGenerator gen(447);
gen_type rng(time(NULL));
coll_t cid(spg_t(pg_t(0,447),shard_id_t::NO_SHARD));
SyntheticWorkloadState test_obj(store.get(), &gen, &rng, cid, 40*1024, 4*1024, 0);
test_obj.init();
for (int i = 0; i < 500; ++i) {
if (!(i % 10)) cerr << "seeding object " << i << std::endl;
test_obj.touch();
}
for (int i = 0; i < 1000; ++i) {
if (!(i % 100)) {
cerr << "Op " << i << std::endl;
test_obj.print_internal_state();
}
boost::uniform_int<> true_false(0, 99);
int val = true_false(rng);
if (val > 97) {
test_obj.scan();
} else if (val > 93) {
test_obj.stat();
} else if (val > 75) {
test_obj.rmattr();
} else if (val > 47) {
test_obj.setattrs();
} else if (val > 45) {
test_obj.clone();
} else if (val > 37) {
test_obj.stash();
} else if (val > 30) {
test_obj.getattrs();
} else {
test_obj.getattr();
}
}
test_obj.wait_for_done();
test_obj.shutdown();
}
TEST_P(StoreTest, HashCollisionTest)
{
int64_t poolid = 11;
coll_t cid(spg_t(pg_t(0,poolid),shard_id_t::NO_SHARD));
int r;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
string base = "";
for (int i = 0; i < 100; ++i) base.append("aaaaa");
set<ghobject_t> created;
for (int n = 0; n < 10; ++n) {
char nbuf[100];
sprintf(nbuf, "n%d", n);
for (int i = 0; i < 1000; ++i) {
char buf[100];
sprintf(buf, "%d", i);
if (!(i % 100)) {
cerr << "Object n" << n << " "<< i << std::endl;
}
ghobject_t hoid(hobject_t(string(buf) + base, string(), CEPH_NOSNAP, 0, poolid, string(nbuf)));
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
created.insert(hoid);
}
}
vector<ghobject_t> objects;
r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(), INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
set<ghobject_t> listed(objects.begin(), objects.end());
cerr << "listed.size() is " << listed.size() << " and created.size() is " << created.size() << std::endl;
ASSERT_TRUE(listed.size() == created.size());
objects.clear();
listed.clear();
ghobject_t current, next;
while (1) {
r = store->collection_list(ch, current, ghobject_t::get_max(), 60,
&objects, &next);
ASSERT_EQ(r, 0);
ASSERT_TRUE(sorted(objects));
for (vector<ghobject_t>::iterator i = objects.begin();
i != objects.end();
++i) {
if (listed.count(*i))
cerr << *i << " repeated" << std::endl;
listed.insert(*i);
}
if (objects.size() < 50) {
ASSERT_TRUE(next.is_max());
break;
}
objects.clear();
current = next;
}
cerr << "listed.size() is " << listed.size() << std::endl;
ASSERT_TRUE(listed.size() == created.size());
for (set<ghobject_t>::iterator i = listed.begin();
i != listed.end();
++i) {
ASSERT_TRUE(created.count(*i));
}
for (set<ghobject_t>::iterator i = created.begin();
i != created.end();
++i) {
ObjectStore::Transaction t;
t.remove(cid, *i);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ObjectStore::Transaction t;
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
TEST_P(StoreTest, ScrubTest)
{
int64_t poolid = 111;
coll_t cid(spg_t(pg_t(0, poolid),shard_id_t(1)));
int r;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
string base = "aaaaa";
set<ghobject_t> created;
for (int i = 0; i < 1000; ++i) {
char buf[100];
sprintf(buf, "%d", i);
if (!(i % 5)) {
cerr << "Object " << i << std::endl;
}
ghobject_t hoid(hobject_t(string(buf) + base, string(), CEPH_NOSNAP, i,
poolid, ""),
ghobject_t::NO_GEN, shard_id_t(1));
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
created.insert(hoid);
}
// Add same hobject_t but different generation
{
ghobject_t hoid1(hobject_t("same-object", string(), CEPH_NOSNAP, 0, poolid, ""),
ghobject_t::NO_GEN, shard_id_t(1));
ghobject_t hoid2(hobject_t("same-object", string(), CEPH_NOSNAP, 0, poolid, ""), (gen_t)1, shard_id_t(1));
ghobject_t hoid3(hobject_t("same-object", string(), CEPH_NOSNAP, 0, poolid, ""), (gen_t)2, shard_id_t(1));
ObjectStore::Transaction t;
t.touch(cid, hoid1);
t.touch(cid, hoid2);
t.touch(cid, hoid3);
r = queue_transaction(store, ch, std::move(t));
created.insert(hoid1);
created.insert(hoid2);
created.insert(hoid3);
ASSERT_EQ(r, 0);
}
vector<ghobject_t> objects;
r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(),
INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
set<ghobject_t> listed(objects.begin(), objects.end());
cerr << "listed.size() is " << listed.size() << " and created.size() is " << created.size() << std::endl;
ASSERT_TRUE(listed.size() == created.size());
objects.clear();
listed.clear();
ghobject_t current, next;
while (1) {
r = store->collection_list(ch, current, ghobject_t::get_max(), 60,
&objects, &next);
ASSERT_EQ(r, 0);
ASSERT_TRUE(sorted(objects));
for (vector<ghobject_t>::iterator i = objects.begin();
i != objects.end(); ++i) {
if (listed.count(*i))
cerr << *i << " repeated" << std::endl;
listed.insert(*i);
}
if (objects.size() < 50) {
ASSERT_TRUE(next.is_max());
break;
}
objects.clear();
current = next.get_boundary();
}
cerr << "listed.size() is " << listed.size() << std::endl;
ASSERT_TRUE(listed.size() == created.size());
for (set<ghobject_t>::iterator i = listed.begin();
i != listed.end();
++i) {
ASSERT_TRUE(created.count(*i));
}
for (set<ghobject_t>::iterator i = created.begin();
i != created.end();
++i) {
ObjectStore::Transaction t;
t.remove(cid, *i);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ObjectStore::Transaction t;
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
TEST_P(StoreTest, OMapTest)
{
coll_t cid;
ghobject_t hoid(hobject_t("tesomap", "", CEPH_NOSNAP, 0, 0, ""));
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
map<string, bufferlist> attrs;
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
t.omap_clear(cid, hoid);
map<string, bufferlist> start_set;
t.omap_setkeys(cid, hoid, start_set);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (int i = 0; i < 100; i++) {
if (!(i%5)) {
std::cout << "On iteration " << i << std::endl;
}
ObjectStore::Transaction t;
bufferlist bl;
map<string, bufferlist> cur_attrs;
r = store->omap_get(ch, hoid, &bl, &cur_attrs);
ASSERT_EQ(r, 0);
for (map<string, bufferlist>::iterator j = attrs.begin();
j != attrs.end();
++j) {
bool correct = cur_attrs.count(j->first) && string(cur_attrs[j->first].c_str()) == string(j->second.c_str());
if (!correct) {
std::cout << j->first << " is present in cur_attrs " << cur_attrs.count(j->first) << " times " << std::endl;
if (cur_attrs.count(j->first) > 0) {
std::cout << j->second.c_str() << " : " << cur_attrs[j->first].c_str() << std::endl;
}
}
ASSERT_EQ(correct, true);
}
ASSERT_EQ(attrs.size(), cur_attrs.size());
char buf[100];
snprintf(buf, sizeof(buf), "%d", i);
bl.clear();
bufferptr bp(buf, strlen(buf) + 1);
bl.append(bp);
map<string, bufferlist> to_add;
to_add.insert(pair<string, bufferlist>("key-" + string(buf), bl));
attrs.insert(pair<string, bufferlist>("key-" + string(buf), bl));
t.omap_setkeys(cid, hoid, to_add);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
int i = 0;
while (attrs.size()) {
if (!(i%5)) {
std::cout << "removal: On iteration " << i << std::endl;
}
ObjectStore::Transaction t;
bufferlist bl;
map<string, bufferlist> cur_attrs;
r = store->omap_get(ch, hoid, &bl, &cur_attrs);
ASSERT_EQ(r, 0);
for (map<string, bufferlist>::iterator j = attrs.begin();
j != attrs.end();
++j) {
bool correct = cur_attrs.count(j->first) && string(cur_attrs[j->first].c_str()) == string(j->second.c_str());
if (!correct) {
std::cout << j->first << " is present in cur_attrs " << cur_attrs.count(j->first) << " times " << std::endl;
if (cur_attrs.count(j->first) > 0) {
std::cout << j->second.c_str() << " : " << cur_attrs[j->first].c_str() << std::endl;
}
}
ASSERT_EQ(correct, true);
}
string to_remove = attrs.begin()->first;
set<string> keys_to_remove;
keys_to_remove.insert(to_remove);
t.omap_rmkeys(cid, hoid, keys_to_remove);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
attrs.erase(to_remove);
++i;
}
{
bufferlist bl1;
bl1.append("omap_header");
ObjectStore::Transaction t;
t.omap_setheader(cid, hoid, bl1);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
t = ObjectStore::Transaction();
bufferlist bl2;
bl2.append("value");
map<string, bufferlist> to_add;
to_add.insert(pair<string, bufferlist>("key", bl2));
t.omap_setkeys(cid, hoid, to_add);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bufferlist bl3;
map<string, bufferlist> cur_attrs;
r = store->omap_get(ch, hoid, &bl3, &cur_attrs);
ASSERT_EQ(r, 0);
ASSERT_EQ(cur_attrs.size(), size_t(1));
ASSERT_TRUE(bl_eq(bl1, bl3));
set<string> keys;
r = store->omap_get_keys(ch, hoid, &keys);
ASSERT_EQ(r, 0);
ASSERT_EQ(keys.size(), size_t(1));
}
// test omap_clear, omap_rmkey_range
{
{
map<string,bufferlist> to_set;
for (int n=0; n<10; ++n) {
to_set[stringify(n)].append("foo");
}
bufferlist h;
h.append("header");
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.touch(cid, hoid);
t.omap_setheader(cid, hoid, h);
t.omap_setkeys(cid, hoid, to_set);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.omap_rmkeyrange(cid, hoid, "3", "7");
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist hdr;
map<string,bufferlist> m;
store->omap_get(ch, hoid, &hdr, &m);
ASSERT_EQ(6u, hdr.length());
ASSERT_TRUE(m.count("2"));
ASSERT_TRUE(!m.count("3"));
ASSERT_TRUE(!m.count("6"));
ASSERT_TRUE(m.count("7"));
ASSERT_TRUE(m.count("8"));
//cout << m << std::endl;
ASSERT_EQ(6u, m.size());
}
{
ObjectStore::Transaction t;
t.omap_clear(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist hdr;
map<string,bufferlist> m;
store->omap_get(ch, hoid, &hdr, &m);
ASSERT_EQ(0u, hdr.length());
ASSERT_EQ(0u, m.size());
}
}
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
TEST_P(StoreTest, OMapIterator)
{
coll_t cid;
ghobject_t hoid(hobject_t("tesomap", "", CEPH_NOSNAP, 0, 0, ""));
int count = 0;
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
map<string, bufferlist> attrs;
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
t.omap_clear(cid, hoid);
map<string, bufferlist> start_set;
t.omap_setkeys(cid, hoid, start_set);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ObjectMap::ObjectMapIterator iter;
bool correct;
//basic iteration
for (int i = 0; i < 100; i++) {
if (!(i%5)) {
std::cout << "On iteration " << i << std::endl;
}
bufferlist bl;
// FileStore may deadlock two active iterators over the same data
iter = ObjectMap::ObjectMapIterator();
iter = store->get_omap_iterator(ch, hoid);
for (iter->seek_to_first(), count=0; iter->valid(); iter->next(), count++) {
string key = iter->key();
bufferlist value = iter->value();
correct = attrs.count(key) && (string(value.c_str()) == string(attrs[key].c_str()));
if (!correct) {
if (attrs.count(key) > 0) {
std::cout << "key " << key << "in omap , " << value.c_str() << " : " << attrs[key].c_str() << std::endl;
} else
std::cout << "key " << key << "should not exists in omap" << std::endl;
}
ASSERT_EQ(correct, true);
}
ASSERT_EQ((int)attrs.size(), count);
// FileStore may deadlock an active iterator vs queue_transaction
iter = ObjectMap::ObjectMapIterator();
char buf[100];
snprintf(buf, sizeof(buf), "%d", i);
bl.clear();
bufferptr bp(buf, strlen(buf) + 1);
bl.append(bp);
map<string, bufferlist> to_add;
to_add.insert(pair<string, bufferlist>("key-" + string(buf), bl));
attrs.insert(pair<string, bufferlist>("key-" + string(buf), bl));
ObjectStore::Transaction t;
t.omap_setkeys(cid, hoid, to_add);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
iter = store->get_omap_iterator(ch, hoid);
//lower bound
string bound_key = "key-5";
iter->lower_bound(bound_key);
correct = bound_key <= iter->key();
if (!correct) {
std::cout << "lower bound, bound key is " << bound_key << " < iter key is " << iter->key() << std::endl;
}
ASSERT_EQ(correct, true);
//upper bound
iter->upper_bound(bound_key);
correct = iter->key() > bound_key;
if (!correct) {
std::cout << "upper bound, bound key is " << bound_key << " >= iter key is " << iter->key() << std::endl;
}
ASSERT_EQ(correct, true);
// FileStore may deadlock an active iterator vs queue_transaction
iter = ObjectMap::ObjectMapIterator();
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, XattrTest)
{
coll_t cid;
ghobject_t hoid(hobject_t("tesomap", "", CEPH_NOSNAP, 0, 0, ""));
bufferlist big;
for (unsigned i = 0; i < 10000; ++i) {
big.append('\0');
}
bufferlist small;
for (unsigned i = 0; i < 10; ++i) {
small.append('\0');
}
int r;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
map<string, bufferlist> attrs;
{
ObjectStore::Transaction t;
t.setattr(cid, hoid, "attr1", small);
attrs["attr1"] = small;
t.setattr(cid, hoid, "attr2", big);
attrs["attr2"] = big;
t.setattr(cid, hoid, "attr3", small);
attrs["attr3"] = small;
t.setattr(cid, hoid, "attr1", small);
attrs["attr1"] = small;
t.setattr(cid, hoid, "attr4", big);
attrs["attr4"] = big;
t.setattr(cid, hoid, "attr3", big);
attrs["attr3"] = big;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
map<string, bufferptr> aset;
store->getattrs(ch, hoid, aset);
ASSERT_EQ(aset.size(), attrs.size());
for (map<string, bufferptr>::iterator i = aset.begin();
i != aset.end();
++i) {
bufferlist bl;
bl.push_back(i->second);
ASSERT_TRUE(attrs[i->first] == bl);
}
{
ObjectStore::Transaction t;
t.rmattr(cid, hoid, "attr2");
attrs.erase("attr2");
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
aset.clear();
store->getattrs(ch, hoid, aset);
ASSERT_EQ(aset.size(), attrs.size());
for (map<string, bufferptr>::iterator i = aset.begin();
i != aset.end();
++i) {
bufferlist bl;
bl.push_back(i->second);
ASSERT_TRUE(attrs[i->first] == bl);
}
bufferptr bp;
r = store->getattr(ch, hoid, "attr2", bp);
ASSERT_EQ(r, -ENODATA);
r = store->getattr(ch, hoid, "attr3", bp);
ASSERT_EQ(r, 0);
bufferlist bl2;
bl2.push_back(bp);
ASSERT_TRUE(bl2 == attrs["attr3"]);
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
void colsplittest(
ObjectStore *store,
unsigned num_objects,
unsigned common_suffix_size,
bool clones
)
{
coll_t cid(spg_t(pg_t(0,52),shard_id_t::NO_SHARD));
coll_t tid(spg_t(pg_t(1<<common_suffix_size,52),shard_id_t::NO_SHARD));
auto ch = store->create_new_collection(cid);
auto tch = store->create_new_collection(tid);
int r = 0;
{
ObjectStore::Transaction t;
t.create_collection(cid, common_suffix_size);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bufferlist small;
small.append("small");
{
ObjectStore::Transaction t;
for (uint32_t i = 0; i < (2 - (int)clones)*num_objects; ++i) {
stringstream objname;
objname << "obj" << i;
ghobject_t a(hobject_t(
objname.str(),
"",
CEPH_NOSNAP,
i<<common_suffix_size,
52, ""));
t.write(cid, a, 0, small.length(), small,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
if (clones) {
objname << "-clone";
ghobject_t b(hobject_t(
objname.str(),
"",
CEPH_NOSNAP,
i<<common_suffix_size,
52, ""));
t.clone(cid, a, b);
}
if (i % 100) {
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
t = ObjectStore::Transaction();
}
}
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.create_collection(tid, common_suffix_size + 1);
t.split_collection(cid, common_suffix_size+1, 1<<common_suffix_size, tid);
r = queue_transaction(store, tch, std::move(t));
ASSERT_EQ(r, 0);
}
tch->flush();
ObjectStore::Transaction t;
vector<ghobject_t> objects;
r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(),
INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
ASSERT_EQ(objects.size(), num_objects);
unsigned size = 0;
for (vector<ghobject_t>::iterator i = objects.begin();
i != objects.end();
++i) {
ASSERT_EQ(!!(i->hobj.get_hash() & (1<<common_suffix_size)), 0u);
t.remove(cid, *i);
if (++size > 100) {
size = 0;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
t = ObjectStore::Transaction();
// test environment may have a low open file limit
ch->flush();
}
}
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
t = ObjectStore::Transaction();
objects.clear();
r = store->collection_list(tch, ghobject_t(), ghobject_t::get_max(),
INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
ASSERT_EQ(objects.size(), num_objects);
for (vector<ghobject_t>::iterator i = objects.begin();
i != objects.end();
++i) {
ASSERT_EQ(!(i->hobj.get_hash() & (1<<common_suffix_size)), 0u);
t.remove(tid, *i);
if (++size > 100) {
size = 0;
r = queue_transaction(store, tch, std::move(t));
ASSERT_EQ(r, 0);
t = ObjectStore::Transaction();
// test environment may have a low open file limit
tch->flush();
}
}
t.remove_collection(tid);
r = queue_transaction(store, tch, std::move(t));
ASSERT_EQ(r, 0);
}
TEST_P(StoreTest, ColSplitTest1)
{
colsplittest(store.get(), 10000, 11, false);
}
TEST_P(StoreTest, ColSplitTest1Clones)
{
colsplittest(store.get(), 10000, 11, true);
}
TEST_P(StoreTest, ColSplitTest2)
{
colsplittest(store.get(), 100, 7, false);
}
TEST_P(StoreTest, ColSplitTest2Clones)
{
colsplittest(store.get(), 100, 7, true);
}
#if 0
TEST_P(StoreTest, ColSplitTest3)
{
colsplittest(store.get(), 100000, 25);
}
#endif
/**
* This test tests adding two different groups
* of objects, each with 1 common prefix and 1
* different prefix. We then remove half
* in order to verify that the merging correctly
* stops at the common prefix subdir. See bug
* #5273 */
TEST_P(StoreTest, TwoHash)
{
coll_t cid;
int r;
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
std::cout << "Making objects" << std::endl;
for (int i = 0; i < 360; ++i) {
ObjectStore::Transaction t;
ghobject_t o;
o.hobj.pool = -1;
if (i < 8) {
o.hobj.set_hash((i << 16) | 0xA1);
t.touch(cid, o);
}
o.hobj.set_hash((i << 16) | 0xB1);
t.touch(cid, o);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
std::cout << "Removing half" << std::endl;
for (int i = 1; i < 8; ++i) {
ObjectStore::Transaction t;
ghobject_t o;
o.hobj.pool = -1;
o.hobj.set_hash((i << 16) | 0xA1);
t.remove(cid, o);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
std::cout << "Checking" << std::endl;
for (int i = 1; i < 8; ++i) {
ObjectStore::Transaction t;
ghobject_t o;
o.hobj.set_hash((i << 16) | 0xA1);
o.hobj.pool = -1;
bool exists = store->exists(ch, o);
ASSERT_EQ(exists, false);
}
{
ghobject_t o;
o.hobj.set_hash(0xA1);
o.hobj.pool = -1;
bool exists = store->exists(ch, o);
ASSERT_EQ(exists, true);
}
std::cout << "Cleanup" << std::endl;
for (int i = 0; i < 360; ++i) {
ObjectStore::Transaction t;
ghobject_t o;
o.hobj.set_hash((i << 16) | 0xA1);
o.hobj.pool = -1;
t.remove(cid, o);
o.hobj.set_hash((i << 16) | 0xB1);
t.remove(cid, o);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ObjectStore::Transaction t;
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
TEST_P(StoreTest, Rename)
{
coll_t cid(spg_t(pg_t(0, 2122),shard_id_t::NO_SHARD));
ghobject_t srcoid(hobject_t("src_oid", "", CEPH_NOSNAP, 0, 0, ""));
ghobject_t dstoid(hobject_t("dest_oid", "", CEPH_NOSNAP, 0, 0, ""));
bufferlist a, b;
a.append("foo");
b.append("bar");
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.write(cid, srcoid, 0, a.length(), a);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_TRUE(store->exists(ch, srcoid));
{
ObjectStore::Transaction t;
t.collection_move_rename(cid, srcoid, cid, dstoid);
t.write(cid, srcoid, 0, b.length(), b);
t.setattr(cid, srcoid, "attr", b);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_TRUE(store->exists(ch, srcoid));
ASSERT_TRUE(store->exists(ch, dstoid));
{
bufferlist bl;
store->read(ch, srcoid, 0, 3, bl);
ASSERT_TRUE(bl_eq(b, bl));
store->read(ch, dstoid, 0, 3, bl);
ASSERT_TRUE(bl_eq(a, bl));
}
{
ObjectStore::Transaction t;
t.remove(cid, dstoid);
t.collection_move_rename(cid, srcoid, cid, dstoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_TRUE(store->exists(ch, dstoid));
ASSERT_FALSE(store->exists(ch, srcoid));
{
bufferlist bl;
store->read(ch, dstoid, 0, 3, bl);
ASSERT_TRUE(bl_eq(b, bl));
}
{
ObjectStore::Transaction t;
t.remove(cid, dstoid);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, MoveRename)
{
coll_t cid(spg_t(pg_t(0, 212),shard_id_t::NO_SHARD));
ghobject_t temp_oid(hobject_t("tmp_oid", "", CEPH_NOSNAP, 0, 0, ""));
ghobject_t oid(hobject_t("dest_oid", "", CEPH_NOSNAP, 0, 0, ""));
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, oid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_TRUE(store->exists(ch, oid));
bufferlist data, attr;
map<string, bufferlist> omap;
data.append("data payload");
attr.append("attr value");
omap["omap_key"].append("omap value");
{
ObjectStore::Transaction t;
t.touch(cid, temp_oid);
t.write(cid, temp_oid, 0, data.length(), data);
t.setattr(cid, temp_oid, "attr", attr);
t.omap_setkeys(cid, temp_oid, omap);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_TRUE(store->exists(ch, temp_oid));
{
ObjectStore::Transaction t;
t.remove(cid, oid);
t.collection_move_rename(cid, temp_oid, cid, oid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ASSERT_TRUE(store->exists(ch, oid));
ASSERT_FALSE(store->exists(ch, temp_oid));
{
bufferlist newdata;
r = store->read(ch, oid, 0, 1000, newdata);
ASSERT_GE(r, 0);
ASSERT_TRUE(bl_eq(data, newdata));
bufferlist newattr;
r = store->getattr(ch, oid, "attr", newattr);
ASSERT_EQ(r, 0);
ASSERT_TRUE(bl_eq(attr, newattr));
set<string> keys;
keys.insert("omap_key");
map<string, bufferlist> newomap;
r = store->omap_get_values(ch, oid, keys, &newomap);
ASSERT_GE(r, 0);
ASSERT_EQ(1u, newomap.size());
ASSERT_TRUE(newomap.count("omap_key"));
ASSERT_TRUE(bl_eq(omap["omap_key"], newomap["omap_key"]));
}
{
ObjectStore::Transaction t;
t.remove(cid, oid);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, BigRGWObjectName)
{
coll_t cid(spg_t(pg_t(0,12),shard_id_t::NO_SHARD));
ghobject_t oid(
hobject_t(
"default.4106.50_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"",
CEPH_NOSNAP,
0x81920472,
12,
""),
15,
shard_id_t::NO_SHARD);
ghobject_t oid2(oid);
oid2.generation = 17;
ghobject_t oidhead(oid);
oidhead.generation = ghobject_t::NO_GEN;
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, oidhead);
t.collection_move_rename(cid, oidhead, cid, oid);
t.touch(cid, oidhead);
t.collection_move_rename(cid, oidhead, cid, oid2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, oid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
vector<ghobject_t> objects;
r = store->collection_list(ch, ghobject_t(), ghobject_t::get_max(),
INT_MAX, &objects, 0);
ASSERT_EQ(r, 0);
ASSERT_EQ(objects.size(), 1u);
ASSERT_EQ(objects[0], oid2);
}
ASSERT_FALSE(store->exists(ch, oid));
{
ObjectStore::Transaction t;
t.remove(cid, oid2);
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, SetAllocHint)
{
coll_t cid;
ghobject_t hoid(hobject_t("test_hint", "", CEPH_NOSNAP, 0, 0, ""));
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*4, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*4, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove_collection(cid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTest, TryMoveRename)
{
coll_t cid;
ghobject_t hoid(hobject_t("test_hint", "", CEPH_NOSNAP, 0, -1, ""));
ghobject_t hoid2(hobject_t("test_hint2", "", CEPH_NOSNAP, 0, -1, ""));
auto ch = store->create_new_collection(cid);
int r;
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.try_rename(cid, hoid, hoid2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.touch(cid, hoid);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.try_rename(cid, hoid, hoid2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
struct stat st;
ASSERT_EQ(store->stat(ch, hoid, &st), -ENOENT);
ASSERT_EQ(store->stat(ch, hoid2, &st), 0);
}
#if defined(WITH_BLUESTORE)
TEST_P(StoreTest, BluestoreOnOffCSumTest)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "bluestore_csum_type", "crc32c");
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
{
auto ch = store->open_collection(cid);
ASSERT_FALSE(ch);
}
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
//write with csum enabled followed by read with csum disabled
size_t block_size = 64*1024;
ObjectStore::Transaction t;
bufferlist bl, orig;
bl.append(std::string(block_size, 'a'));
orig = bl;
t.remove(cid, hoid);
t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*8, 0);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "Remove then create" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
SetVal(g_conf, "bluestore_csum_type", "none");
g_conf->apply_changes(NULL);
bufferlist in;
r = store->read(ch, hoid, 0, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
}
{
//write with csum disabled followed by read with csum enabled
size_t block_size = 64*1024;
ObjectStore::Transaction t;
bufferlist bl, orig;
bl.append(std::string(block_size, 'a'));
orig = bl;
t.remove(cid, hoid);
t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*8, 0);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "Remove then create" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
SetVal(g_conf, "bluestore_csum_type", "crc32c");
g_conf->apply_changes(NULL);
bufferlist in;
r = store->read(ch, hoid, 0, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
}
{
//'mixed' non-overlapping writes to the same blob
ObjectStore::Transaction t;
bufferlist bl, orig;
size_t block_size = 8000;
bl.append(std::string(block_size, 'a'));
orig = bl;
t.remove(cid, hoid);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "Remove then create" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
SetVal(g_conf, "bluestore_csum_type", "none");
g_conf->apply_changes(NULL);
ObjectStore::Transaction t2;
t2.write(cid, hoid, block_size*2, bl.length(), bl);
cerr << "Append 'unprotected'" << std::endl;
r = queue_transaction(store, ch, std::move(t2));
ASSERT_EQ(r, 0);
bufferlist in;
r = store->read(ch, hoid, 0, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
in.clear();
r = store->read(ch, hoid, block_size*2, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
SetVal(g_conf, "bluestore_csum_type", "crc32c");
g_conf->apply_changes(NULL);
in.clear();
r = store->read(ch, hoid, 0, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
in.clear();
r = store->read(ch, hoid, block_size*2, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
}
{
//partially blob overwrite under a different csum enablement mode
ObjectStore::Transaction t;
bufferlist bl, orig, orig2;
size_t block_size0 = 0x10000;
size_t block_size = 9000;
size_t block_size2 = 5000;
bl.append(std::string(block_size0, 'a'));
t.remove(cid, hoid);
t.set_alloc_hint(cid, hoid, 4*1024*1024, 1024*8, 0);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "Remove then create" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
SetVal(g_conf, "bluestore_csum_type", "none");
g_conf->apply_changes(NULL);
ObjectStore::Transaction t2;
bl.clear();
bl.append(std::string(block_size, 'b'));
t2.write(cid, hoid, 0, bl.length(), bl);
t2.write(cid, hoid, block_size0, bl.length(), bl);
cerr << "Overwrite with unprotected data" << std::endl;
r = queue_transaction(store, ch, std::move(t2));
ASSERT_EQ(r, 0);
orig = bl;
orig2 = bl;
orig.append( std::string(block_size0 - block_size, 'a'));
bufferlist in;
r = store->read(ch, hoid, 0, block_size0, in);
ASSERT_EQ((int)block_size0, r);
ASSERT_TRUE(bl_eq(orig, in));
r = store->read(ch, hoid, block_size0, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig2, in));
SetVal(g_conf, "bluestore_csum_type", "crc32c");
g_conf->apply_changes(NULL);
ObjectStore::Transaction t3;
bl.clear();
bl.append(std::string(block_size2, 'c'));
t3.write(cid, hoid, block_size0, bl.length(), bl);
cerr << "Overwrite with protected data" << std::endl;
r = queue_transaction(store, ch, std::move(t3));
ASSERT_EQ(r, 0);
in.clear();
orig = bl;
orig.append( std::string(block_size - block_size2, 'b'));
r = store->read(ch, hoid, block_size0, block_size, in);
ASSERT_EQ((int)block_size, r);
ASSERT_TRUE(bl_eq(orig, in));
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
#endif
INSTANTIATE_TEST_CASE_P(
ObjectStore,
StoreTest,
::testing::Values(
"memstore",
"filestore",
#if defined(WITH_BLUESTORE)
"bluestore",
#endif
"kstore"));
// Note: instantiate all stores to preserve store numbering order only
INSTANTIATE_TEST_CASE_P(
ObjectStore,
StoreTestSpecificAUSize,
::testing::Values(
"memstore",
"filestore",
#if defined(WITH_BLUESTORE)
"bluestore",
#endif
"kstore"));
#else
// Google Test may not support value-parameterized tests with some
// compilers. If we use conditional compilation to compile out all
// code referring to the gtest_main library, MSVC linker will not link
// that library at all and consequently complain about missing entry
// point defined in that library (fatal error LNK1561: entry point
// must be defined). This dummy test keeps gtest_main linked in.
TEST(DummyTest, ValueParameterizedTestsAreNotSupportedOnThisPlatform) {}
#endif
void doMany4KWritesTest(boost::scoped_ptr<ObjectStore>& store,
unsigned max_objects,
unsigned max_ops,
unsigned max_object_size,
unsigned max_write_size,
unsigned write_alignment)
{
MixedGenerator gen(555);
gen_type rng(time(NULL));
coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD));
store_statfs_t res_stat;
SyntheticWorkloadState test_obj(store.get(),
&gen,
&rng,
cid,
max_object_size,
max_write_size,
write_alignment);
test_obj.init();
for (unsigned i = 0; i < max_objects; ++i) {
if (!(i % 500)) cerr << "seeding object " << i << std::endl;
test_obj.touch();
}
for (unsigned i = 0; i < max_ops; ++i) {
if (!(i % 200)) {
cerr << "Op " << i << std::endl;
test_obj.print_internal_state();
}
test_obj.write();
}
test_obj.wait_for_done();
test_obj.statfs(res_stat);
if (!(res_stat.stored <= max_object_size) ||
!(res_stat.allocated <= max_object_size)) {
// this will provide more insight on the mismatch and
// helps to avoid any races during stats collection
test_obj.fsck(false);
// retrieving stats once again and assert if still broken
test_obj.statfs(res_stat);
ASSERT_LE(res_stat.stored, max_object_size);
ASSERT_LE(res_stat.allocated, max_object_size);
}
test_obj.shutdown();
}
TEST_P(StoreTestSpecificAUSize, Many4KWritesTest)
{
if (string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
const unsigned max_object = 4*1024*1024;
doMany4KWritesTest(store, 1, 1000, max_object, 4*1024, 0);
}
TEST_P(StoreTestSpecificAUSize, Many4KWritesNoCSumTest)
{
if (string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
SetVal(g_conf, "bluestore_csum_type", "none");
g_ceph_context->_conf->apply_changes(NULL);
const unsigned max_object = 4*1024*1024;
doMany4KWritesTest(store, 1, 1000, max_object, 4*1024, 0 );
}
TEST_P(StoreTestSpecificAUSize, TooManyBlobsTest)
{
if (string(GetParam()) != "bluestore")
return;
StartDeferred(0x10000);
const unsigned max_object = 4*1024*1024;
doMany4KWritesTest(store, 1, 1000, max_object, 4*1024, 0);
}
#if defined(WITH_BLUESTORE)
void get_mempool_stats(uint64_t* total_bytes, uint64_t* total_items)
{
uint64_t onode_allocated = mempool::bluestore_cache_onode::allocated_bytes();
uint64_t other_allocated = mempool::bluestore_cache_other::allocated_bytes();
uint64_t onode_items = mempool::bluestore_cache_onode::allocated_items();
uint64_t other_items = mempool::bluestore_cache_other::allocated_items();
cout << "onode(" << onode_allocated << "/" << onode_items
<< ") other(" << other_allocated << "/" << other_items
<< ")" << std::endl;
*total_bytes = onode_allocated + other_allocated;
*total_items = onode_items;
}
TEST_P(StoreTestSpecificAUSize, OnodeSizeTracking)
{
if (string(GetParam()) != "bluestore")
return;
size_t block_size = 4096;
StartDeferred(block_size);
SetVal(g_conf, "bluestore_compression_mode", "none");
SetVal(g_conf, "bluestore_csum_type", "none");
SetVal(g_conf, "bluestore_cache_size_hdd", "400000000");
SetVal(g_conf, "bluestore_cache_size_ssd", "400000000");
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid(hobject_t("test_hint", "", CEPH_NOSNAP, 0, -1, ""));
size_t obj_size = 4 * 1024 * 1024;
uint64_t total_bytes, total_bytes2;
uint64_t total_onodes;
get_mempool_stats(&total_bytes, &total_onodes);
ASSERT_EQ(total_onodes, 0u);
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl, orig, orig2;
bl.append(std::string(obj_size, 'a'));
t.write(cid, hoid, 0, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
get_mempool_stats(&total_bytes, &total_onodes);
ASSERT_NE(total_bytes, 0u);
ASSERT_EQ(total_onodes, 1u);
{
ObjectStore::Transaction t;
t.truncate(cid, hoid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for(size_t i = 0; i < 1; ++i) {
bufferlist bl;
bl.append(std::string(block_size * (i+1), 'a'));
for( size_t j = 0; j < obj_size; j+= bl.length()) {
ObjectStore::Transaction t;
t.write(cid, hoid, j, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
get_mempool_stats(&total_bytes2, &total_onodes);
ASSERT_NE(total_bytes2, 0u);
ASSERT_EQ(total_onodes, 1u);
}
{
cout <<" mempool dump:\n";
JSONFormatter f(true);
f.open_object_section("transaction");
mempool::dump(&f);
f.close_section();
f.flush(cout);
cout << std::endl;
}
{
bufferlist bl;
for (size_t i = 0; i < obj_size; i += 0x1000) {
store->read(ch, hoid, i, 0x1000, bl);
}
}
get_mempool_stats(&total_bytes, &total_onodes);
ASSERT_NE(total_bytes, 0u);
ASSERT_EQ(total_onodes, 1u);
{
cout <<" mempool dump:\n";
JSONFormatter f(true);
f.open_object_section("transaction");
mempool::dump(&f);
f.close_section();
f.flush(cout);
cout << std::endl;
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwrite)
{
if (string(GetParam()) != "bluestore")
return;
size_t block_size = 4096;
StartDeferred(block_size);
SetVal(g_conf, "bluestore_max_blob_size", "65536");
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid(hobject_t("test_hint", "", CEPH_NOSNAP, 0, -1, ""));
const PerfCounters* logger = store->get_perf_counters();
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 2, 'a'));
t.write(cid, hoid, 0, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// overwrite at the beginning
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'b'));
t.write(cid, hoid, 0, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// append
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 2, 'c'));
t.write(cid, hoid, block_size * 2, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// append with a gap
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 2, 'd'));
t.write(cid, hoid, block_size * 5, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, 0, block_size, bl);
ASSERT_EQ(r, (int)block_size);
expected.append(string(block_size, 'b'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 2u);
}
{
// overwrite at end
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 2, 'e'));
// Currently we are unable to reuse blob when overwriting in a single step
t.write(cid, hoid, block_size * 6, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, 0, block_size, bl);
ASSERT_EQ(r, (int)block_size);
expected.append(string(block_size, 'b'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 2u);
}
{
// fill the gap
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'f'));
t.write(cid, hoid, block_size * 4, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// we need to wait some time for mempool
// thread to update stats to be able to check blob/extent numbers from
// perf counters.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, 0, block_size, bl);
ASSERT_EQ(r, (int)block_size);
expected.append(string(block_size, 'b'));
ASSERT_TRUE(bl_eq(expected, bl));
bl.clear();
expected.clear();
r = store->read(ch, hoid, block_size, block_size, bl);
ASSERT_EQ(r, (int)block_size);
expected.append(string(block_size, 'a'));
ASSERT_TRUE(bl_eq(expected, bl));
bl.clear();
expected.clear();
r = store->read(ch, hoid, block_size * 2, block_size * 2, bl);
ASSERT_EQ(r, (int)block_size * 2);
expected.append(string(block_size * 2, 'c'));
ASSERT_TRUE(bl_eq(expected, bl));
bl.clear();
expected.clear();
r = store->read(ch, hoid, block_size * 4, block_size, bl);
ASSERT_EQ(r, (int)block_size);
expected.append(string(block_size, 'f'));
ASSERT_TRUE(bl_eq(expected, bl));
bl.clear();
expected.clear();
r = store->read(ch, hoid, block_size * 5, block_size, bl);
ASSERT_EQ(r, (int)block_size);
expected.append(string(block_size, 'd'));
ASSERT_TRUE(bl_eq(expected, bl));
bl.clear();
expected.clear();
r = store->read(ch, hoid, block_size * 5, block_size * 3, bl);
ASSERT_EQ(r, (int)block_size * 3);
expected.append(string(block_size, 'd'));
expected.append(string(block_size * 2, 'e'));
ASSERT_TRUE(bl_eq(expected, bl));
}
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 1u);
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwriteReverse)
{
if (string(GetParam()) != "bluestore")
return;
size_t block_size = 4096;
StartDeferred(block_size);
SetVal(g_conf, "bluestore_max_blob_size", "65536");
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid(hobject_t("test_hint", "", CEPH_NOSNAP, 0, -1, ""));
auto ch = store->create_new_collection(cid);
const PerfCounters* logger = store->get_perf_counters();
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 2, 'a'));
t.write(cid, hoid, block_size * 10, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// prepend existing
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'b'));
t.write(cid, hoid, block_size * 9, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, block_size * 9, block_size * 2, bl);
ASSERT_EQ(r, (int)block_size * 2);
expected.append(string(block_size, 'b'));
expected.append(string(block_size, 'a'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 1u);
}
{
// prepend existing with a gap
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'c'));
t.write(cid, hoid, block_size * 7, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, block_size * 7, block_size * 3, bl);
ASSERT_EQ(r, (int)block_size * 3);
expected.append(string(block_size, 'c'));
expected.append(string(block_size, 0));
expected.append(string(block_size, 'b'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 2u);
}
{
// append after existing with a gap
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'd'));
t.write(cid, hoid, block_size * 13, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, block_size * 11, block_size * 3, bl);
ASSERT_EQ(r, (int)block_size * 3);
expected.append(string(block_size, 'a'));
expected.append(string(block_size, 0));
expected.append(string(block_size, 'd'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 3u);
}
{
// append twice to the next max_blob slot
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'e'));
t.write(cid, hoid, block_size * 17, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
t.write(cid, hoid, block_size * 19, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, block_size * 17, block_size * 3, bl);
ASSERT_EQ(r, (int)block_size * 3);
expected.append(string(block_size, 'e'));
expected.append(string(block_size, 0));
expected.append(string(block_size, 'e'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 2u);
ASSERT_EQ(logger->get(l_bluestore_extents), 5u);
}
{
// fill gaps at the second slot
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'f'));
t.write(cid, hoid, block_size * 16, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
t.write(cid, hoid, block_size * 18, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, block_size * 16, block_size * 4, bl);
ASSERT_EQ(r, (int)block_size * 4);
expected.append(string(block_size, 'f'));
expected.append(string(block_size, 'e'));
expected.append(string(block_size, 'f'));
expected.append(string(block_size, 'e'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 2u);
ASSERT_EQ(logger->get(l_bluestore_extents), 4u);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTestSpecificAUSize, BlobReuseOnSmallOverwrite)
{
if (string(GetParam()) != "bluestore")
return;
size_t block_size = 4096;
StartDeferred(block_size);
SetVal(g_conf, "bluestore_max_blob_size", "65536");
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid(hobject_t("test_hint", "", CEPH_NOSNAP, 0, -1, ""));
const PerfCounters* logger = store->get_perf_counters();
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size, 'a'));
t.write(cid, hoid, 0, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
t.write(cid, hoid, block_size * 2, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// write small into the gap
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(3, 'b'));
t.write(cid, hoid, block_size + 1, bl.length(), bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// We need to issue a read to trigger cache stat update that refresh
// perf counters. additionally we need to wait some time for mempool
// thread to update stats.
sleep(1);
bufferlist bl, expected;
r = store->read(ch, hoid, 0, block_size * 3, bl);
ASSERT_EQ(r, (int)block_size * 3);
expected.append(string(block_size, 'a'));
expected.append(string(1, 0));
expected.append(string(3, 'b'));
expected.append(string(block_size - 4, 0));
expected.append(string(block_size, 'a'));
ASSERT_TRUE(bl_eq(expected, bl));
ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
ASSERT_EQ(logger->get(l_bluestore_extents), 3u);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
// The test case to reproduce an issue when write happens
// to a zero space between the extents sharing the same spanning blob
// with unloaded shard map.
// Second extent might be filled with zeros this way due to wrong result
// returned by has_any_extents() call in do_write_small. The latter is caused
// by incompletly loaded extent map.
TEST_P(StoreTestSpecificAUSize, SmallWriteOnShardedExtents)
{
if (string(GetParam()) != "bluestore")
return;
size_t block_size = 0x10000;
StartDeferred(block_size);
SetVal(g_conf, "bluestore_csum_type", "xxhash64");
SetVal(g_conf, "bluestore_max_blob_size", "524288"); // for sure
g_conf->apply_changes(NULL);
int r;
coll_t cid;
ghobject_t hoid1(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
//doing some tricks to have sharded extents/spanning objects
ObjectStore::Transaction t;
bufferlist bl, bl2;
bl.append(std::string(0x80000, 'a'));
t.write(cid, hoid1, 0, bl.length(), bl, 0);
t.zero(cid, hoid1, 0x719e0, 0x75b0 );
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
bl2.append(std::string(0x70000, 'b'));
t.write(cid, hoid1, 0, bl2.length(), bl2, 0);
t.zero(cid, hoid1, 0, 0x50000);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
ch.reset();
store->umount();
store->mount();
ch = store->open_collection(cid);
{
// do a write to zero space in between some extents sharing the same blob
ObjectStore::Transaction t;
bufferlist bl, bl2;
bl.append(std::string(0x6520, 'c'));
t.write(cid, hoid1, 0x71c00, bl.length(), bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
bufferlist bl, expected;
r = store->read(ch, hoid1, 0x70000, 0x9c00, bl);
ASSERT_EQ(r, (int)0x9c00);
expected.append(string(0x19e0, 'a'));
expected.append(string(0x220, 0));
expected.append(string(0x6520, 'c'));
expected.append(string(0xe70, 0));
expected.append(string(0xc70, 'a'));
ASSERT_TRUE(bl_eq(expected, bl));
bl.clear();
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid1);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
TEST_P(StoreTestSpecificAUSize, ExcessiveFragmentation)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "bluestore_block_size",
stringify((uint64_t)2048 * 1024 * 1024).c_str());
ASSERT_EQ(g_conf->get_val<uint64_t>("bluefs_alloc_size"),
1024 * 1024);
size_t block_size = 0x10000;
StartDeferred(block_size);
int r;
coll_t cid;
ghobject_t hoid1(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
// create 2x400MB objects in a way that their pextents are interleaved
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 4, 'a')); // 256KB
uint64_t offs = 0;
while(offs < (uint64_t)400 * 1024 * 1024) {
t.write(cid, hoid1, offs, bl.length(), bl, 0);
t.write(cid, hoid2, offs, bl.length(), bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
offs += bl.length();
if( (offs % (100 * 1024 * 1024)) == 0) {
std::cout<<"written " << offs << std::endl;
}
}
}
std::cout<<"written 800MB"<<std::endl;
{
// Partially overwrite objects with 100MB each leaving space
// fragmented and occuping still unfragmented space at the end
// So we'll have enough free space but it'll lack long enough (e.g. 1MB)
// contiguous pextents.
ObjectStore::Transaction t;
bufferlist bl;
bl.append(std::string(block_size * 4, 'a'));
uint64_t offs = 0;
while(offs < 112 * 1024 * 1024) {
t.write(cid, hoid1, offs, bl.length(), bl, 0);
t.write(cid, hoid2, offs, bl.length(), bl, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
// this will produce high fragmentation if original allocations
// were contiguous
offs += bl.length();
if( (offs % (10 * 1024 * 1024)) == 0) {
std::cout<<"written " << offs << std::endl;
}
}
}
{
// remove one of the object producing much free space
// and hence triggering bluefs rebalance.
// Which should fail as there is no long enough pextents.
ObjectStore::Transaction t;
t.remove(cid, hoid2);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
auto to_sleep = 5 *
(int)g_conf->get_val<double>("bluestore_bluefs_balance_interval");
std::cout<<"sleeping... " << std::endl;
sleep(to_sleep);
{
// touch another object to triggerrebalance
ObjectStore::Transaction t;
t.touch(cid, hoid1);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid1);
t.remove(cid, hoid2);
t.remove_collection(cid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
#endif //#if defined(WITH_BLUESTORE)
TEST_P(StoreTest, KVDBHistogramTest)
{
if (string(GetParam()) != "bluestore")
return;
int NUM_OBJS = 200;
int r = 0;
coll_t cid;
string base("testobj.");
bufferlist a;
bufferptr ap(0x1000);
memset(ap.c_str(), 'a', 0x1000);
a.append(ap);
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (int i = 0; i < NUM_OBJS; ++i) {
ObjectStore::Transaction t;
char buf[100];
snprintf(buf, sizeof(buf), "%d", i);
ghobject_t hoid(hobject_t(sobject_t(base + string(buf), CEPH_NOSNAP)));
t.write(cid, hoid, 0, 0x1000, a);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
Formatter *f = Formatter::create("store_test", "json-pretty", "json-pretty");
store->generate_db_histogram(f);
f->flush(cout);
cout << std::endl;
}
TEST_P(StoreTest, KVDBStatsTest)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "rocksdb_perf", "true");
SetVal(g_conf, "rocksdb_collect_compaction_stats", "true");
SetVal(g_conf, "rocksdb_collect_extended_stats","true");
SetVal(g_conf, "rocksdb_collect_memory_stats","true");
g_ceph_context->_conf->apply_changes(NULL);
int r = store->umount();
ASSERT_EQ(r, 0);
r = store->mount(); //to force rocksdb stats
ASSERT_EQ(r, 0);
int NUM_OBJS = 200;
coll_t cid;
string base("testobj.");
bufferlist a;
bufferptr ap(0x1000);
memset(ap.c_str(), 'a', 0x1000);
a.append(ap);
auto ch = store->create_new_collection(cid);
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
for (int i = 0; i < NUM_OBJS; ++i) {
ObjectStore::Transaction t;
char buf[100];
snprintf(buf, sizeof(buf), "%d", i);
ghobject_t hoid(hobject_t(sobject_t(base + string(buf), CEPH_NOSNAP)));
t.write(cid, hoid, 0, 0x1000, a);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
Formatter *f = Formatter::create("store_test", "json-pretty", "json-pretty");
store->get_db_statistics(f);
f->flush(cout);
cout << std::endl;
}
#if defined(WITH_BLUESTORE)
TEST_P(StoreTestSpecificAUSize, garbageCollection)
{
int r;
coll_t cid;
int buf_len = 256 * 1024;
int overlap_offset = 64 * 1024;
int write_offset = buf_len;
if (string(GetParam()) != "bluestore")
return;
#define WRITE_AT(offset, _length) {\
ObjectStore::Transaction t;\
if ((uint64_t)_length != bl.length()) { \
buffer::ptr p(bl.c_str(), _length);\
bufferlist bl_tmp;\
bl_tmp.push_back(p);\
t.write(cid, hoid, offset, bl_tmp.length(), bl_tmp);\
} else {\
t.write(cid, hoid, offset, bl.length(), bl);\
}\
r = queue_transaction(store, ch, std::move(t));\
ASSERT_EQ(r, 0);\
}
StartDeferred(65536);
SetVal(g_conf, "bluestore_compression_max_blob_size", "524288");
SetVal(g_conf, "bluestore_compression_min_blob_size", "262144");
SetVal(g_conf, "bluestore_compression_mode", "force");
g_conf->apply_changes(NULL);
auto ch = store->create_new_collection(cid);
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
{
bufferlist in;
r = store->read(ch, hoid, 0, 5, in);
ASSERT_EQ(-ENOENT, r);
}
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
cerr << "Creating collection " << cid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
std::string data;
data.resize(buf_len);
{
{
bool exists = store->exists(ch, hoid);
ASSERT_TRUE(!exists);
ObjectStore::Transaction t;
t.touch(cid, hoid);
cerr << "Creating object " << hoid << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
exists = store->exists(ch, hoid);
ASSERT_EQ(true, exists);
}
bufferlist bl;
for(size_t i = 0; i < data.size(); i++)
data[i] = i % 256;
bl.append(data);
{
struct store_statfs_t statfs;
WRITE_AT(0, buf_len);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
}
{
struct store_statfs_t statfs;
WRITE_AT(write_offset - 2 * overlap_offset, buf_len);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x20000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0u);
}
{
struct store_statfs_t statfs;
WRITE_AT(write_offset - overlap_offset, buf_len);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x20000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x10000u);
}
{
struct store_statfs_t statfs;
WRITE_AT(write_offset - 3 * overlap_offset, buf_len);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x20000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x20000u);
}
{
struct store_statfs_t statfs;
WRITE_AT(write_offset + 1, overlap_offset-1);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x20000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x20000u);
}
{
struct store_statfs_t statfs;
WRITE_AT(write_offset + 1, overlap_offset);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x3ffffu);
}
{
struct store_statfs_t statfs;
WRITE_AT(0, buf_len-1);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x40001u);
}
SetVal(g_conf, "bluestore_gc_enable_total_threshold", "1"); //forbid GC when saving = 0
{
struct store_statfs_t statfs;
WRITE_AT(1, overlap_offset-2);
WRITE_AT(overlap_offset * 2 + 1, overlap_offset-2);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x10000);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x40001u);
}
{
struct store_statfs_t statfs;
WRITE_AT(overlap_offset + 1, overlap_offset-2);
int r = store->statfs(&statfs);
ASSERT_EQ(r, 0);
ASSERT_EQ(statfs.compressed_allocated, 0x0);
const PerfCounters* counters = store->get_perf_counters();
ASSERT_EQ(counters->get(l_bluestore_gc_merged), 0x40007u);
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid);
cerr << "Cleaning" << std::endl;
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
}
}
TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "bluestore_block_size",
stringify(0x280005000).c_str()); //10 Gb + 4K
SetVal(g_conf, "bluestore_fsck_on_mount", "false");
SetVal(g_conf, "bluestore_fsck_on_umount", "false");
StartDeferred(0x4000);
store->umount();
ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly
store->mount();
}
TEST_P(StoreTestSpecificAUSize, fsckOnUnalignedDevice2)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "bluestore_block_size",
stringify(0x280005000).c_str()); //10 Gb + 20K
SetVal(g_conf, "bluestore_fsck_on_mount", "false");
SetVal(g_conf, "bluestore_fsck_on_umount", "false");
StartDeferred(0x1000);
store->umount();
ASSERT_EQ(store->fsck(false), 0); // do fsck explicitly
store->mount();
}
TEST_P(StoreTest, BluestoreRepairTest)
{
if (string(GetParam()) != "bluestore")
return;
const size_t offs_base = 65536 / 2;
SetVal(g_conf, "bluestore_fsck_on_mount", "false");
SetVal(g_conf, "bluestore_fsck_on_umount", "false");
SetVal(g_conf, "bluestore_max_blob_size",
stringify(2 * offs_base).c_str());
SetVal(g_conf, "bluestore_extent_map_shard_max_size", "12000");
g_ceph_context->_conf->apply_changes(NULL);
BlueStore* bstore = dynamic_cast<BlueStore*> (store.get());
// fill the store with some data
coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD));
auto ch = store->create_new_collection(cid);
ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP)));
ghobject_t hoid_dup(hobject_t(sobject_t("Object 1(dup)", CEPH_NOSNAP)));
ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP)));
ghobject_t hoid_cloned = hoid2;
hoid_cloned.hobj.snap = 1;
ghobject_t hoid3(hobject_t(sobject_t("Object 3", CEPH_NOSNAP)));
ghobject_t hoid3_cloned = hoid3;
hoid3_cloned.hobj.snap = 1;
bufferlist bl;
bl.append("1234512345");
int r;
const size_t repeats = 16;
{
auto ch = store->create_new_collection(cid);
cerr << "create collection + write" << std::endl;
ObjectStore::Transaction t;
t.create_collection(cid, 0);
for( auto i = 0ul; i < repeats; ++i ) {
t.write(cid, hoid, i * offs_base, bl.length(), bl);
t.write(cid, hoid_dup, i * offs_base, bl.length(), bl);
}
for( auto i = 0ul; i < repeats; ++i ) {
t.write(cid, hoid2, i * offs_base, bl.length(), bl);
}
t.clone(cid, hoid2, hoid_cloned);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bstore->umount();
//////////// leaked pextent fix ////////////
cerr << "fix leaked pextents" << std::endl;
ASSERT_EQ(bstore->fsck(false), 0);
ASSERT_EQ(bstore->repair(false), 0);
bstore->mount();
bstore->inject_leaked(0x30000);
bstore->umount();
ASSERT_EQ(bstore->fsck(false), 1);
ASSERT_EQ(bstore->repair(false), 0);
ASSERT_EQ(bstore->fsck(false), 0);
//////////// false free fix ////////////
cerr << "fix false free pextents" << std::endl;
bstore->mount();
bstore->inject_false_free(cid, hoid);
bstore->umount();
ASSERT_EQ(bstore->fsck(false), 2);
ASSERT_EQ(bstore->repair(false), 0);
ASSERT_EQ(bstore->fsck(false), 0);
//////////// verify invalid statfs ///////////
cerr << "fix invalid statfs" << std::endl;
store_statfs_t statfs0, statfs;
bstore->mount();
ASSERT_EQ(bstore->statfs(&statfs0), 0);
statfs = statfs0;
statfs.allocated += 0x10000;
statfs.stored += 0x10000;
ASSERT_FALSE(statfs0 == statfs);
bstore->inject_statfs(statfs);
bstore->umount();
ASSERT_EQ(bstore->fsck(false), 1);
ASSERT_EQ(bstore->repair(false), 0);
ASSERT_EQ(bstore->fsck(false), 0);
ASSERT_EQ(bstore->mount(), 0);
ASSERT_EQ(bstore->statfs(&statfs), 0);
// adjust free space to success in comparison
statfs0.available = statfs.available;
ASSERT_EQ(statfs0, statfs);
///////// undecodable shared blob key / stray shared blob records ///////
cerr << "undecodable shared blob key" << std::endl;
bstore->inject_broken_shared_blob_key("undec1",
bufferlist());
bstore->inject_broken_shared_blob_key("undecodable key 2",
bufferlist());
bstore->inject_broken_shared_blob_key("undecodable key 3",
bufferlist());
bstore->umount();
ASSERT_EQ(bstore->fsck(false), 3);
ASSERT_EQ(bstore->repair(false), 0);
ASSERT_EQ(bstore->fsck(false), 0);
cerr << "misreferencing" << std::endl;
bstore->mount();
bstore->inject_misreference(cid, hoid, cid, hoid_dup, 0);
bstore->inject_misreference(cid, hoid, cid, hoid_dup, (offs_base * repeats) / 2);
bstore->inject_misreference(cid, hoid, cid, hoid_dup, offs_base * (repeats -1) );
bstore->umount();
ASSERT_EQ(bstore->fsck(false), 6);
ASSERT_EQ(bstore->repair(false), 0);
ASSERT_EQ(bstore->fsck(true), 0);
// reproducing issues #21040 & 20983
SetVal(g_conf, "bluestore_debug_inject_bug21040", "true");
g_ceph_context->_conf->apply_changes(NULL);
bstore->mount();
cerr << "repro bug #21040" << std::endl;
{
auto ch = store->open_collection(cid);
{
ObjectStore::Transaction t;
bl.append("0123456789012345");
t.write(cid, hoid3, offs_base, bl.length(), bl);
bl.clear();
bl.append('!');
t.write(cid, hoid3, 0, bl.length(), bl);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
ObjectStore::Transaction t;
t.clone(cid, hoid3, hoid3_cloned);
r = queue_transaction(store, ch, std::move(t));
ASSERT_EQ(r, 0);
}
bstore->umount();
ASSERT_EQ(bstore->fsck(false), 3);
ASSERT_LE(bstore->repair(false), 0);
ASSERT_EQ(bstore->fsck(false), 0);
SetVal(g_conf, "bluestore_debug_inject_bug21040", "true");
g_ceph_context->_conf->apply_changes(NULL);
}
cerr << "Completing" << std::endl;
bstore->mount();
}
TEST_P(StoreTest, BluestoreStatistics)
{
if (string(GetParam()) != "bluestore")
return;
SetVal(g_conf, "rocksdb_perf", "true");
SetVal(g_conf, "rocksdb_collect_compaction_stats", "true");
SetVal(g_conf, "rocksdb_collect_extended_stats","true");
SetVal(g_conf, "rocksdb_collect_memory_stats","true");
// disable cache
SetVal(g_conf, "bluestore_cache_size_ssd", "0");
SetVal(g_conf, "bluestore_cache_size_hdd", "0");
SetVal(g_conf, "bluestore_cache_size", "0");
g_ceph_context->_conf->apply_changes(NULL);
int r = store->umount();
ASSERT_EQ(r, 0);
r = store->mount();
ASSERT_EQ(r, 0);
BlueStore* bstore = NULL;
EXPECT_NO_THROW(bstore = dynamic_cast<BlueStore*> (store.get()));
coll_t cid;
ghobject_t hoid(hobject_t("test_db_statistics", "", CEPH_NOSNAP, 0, 0, ""));
auto ch = bstore->create_new_collection(cid);
bufferlist bl;
bl.append("0123456789abcdefghi");
{
ObjectStore::Transaction t;
t.create_collection(cid, 0);
t.touch(cid, hoid);
t.write(cid, hoid, 0, bl.length(), bl);
cerr << "Write object" << std::endl;
r = queue_transaction(bstore, ch, std::move(t));
ASSERT_EQ(r, 0);
}
{
bufferlist readback;
r = store->read(ch, hoid, 0, bl.length(), readback);
ASSERT_EQ(r, bl.length());
ASSERT_TRUE(bl_eq(bl, readback));
}
Formatter *f = Formatter::create("store_test", "json-pretty", "json-pretty");
EXPECT_NO_THROW(store->get_db_statistics(f));
f->flush(cout);
cout << std::endl;
}
#endif // WITH_BLUESTORE
int main(int argc, char **argv)
{
vector<const char*> args;
argv_to_vec(argc, (const char **)argv, args);
auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
CODE_ENVIRONMENT_UTILITY,
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
common_init_finish(g_ceph_context);
// make sure we can adjust any config settings
g_ceph_context->_conf->_clear_safe_to_start_threads();
g_ceph_context->_conf->set_val_or_die("osd_journal_size", "400");
g_ceph_context->_conf->set_val_or_die("filestore_index_retry_probability", "0.5");
g_ceph_context->_conf->set_val_or_die("filestore_op_thread_timeout", "1000");
g_ceph_context->_conf->set_val_or_die("filestore_op_thread_suicide_timeout", "10000");
//g_ceph_context->_conf->set_val_or_die("filestore_fiemap", "true");
g_ceph_context->_conf->set_val_or_die("bluestore_fsck_on_mkfs", "false");
g_ceph_context->_conf->set_val_or_die("bluestore_fsck_on_mount", "false");
g_ceph_context->_conf->set_val_or_die("bluestore_fsck_on_umount", "false");
g_ceph_context->_conf->set_val_or_die("bluestore_debug_misc", "true");
g_ceph_context->_conf->set_val_or_die("bluestore_debug_small_allocations", "4");
g_ceph_context->_conf->set_val_or_die("bluestore_debug_freelist", "true");
g_ceph_context->_conf->set_val_or_die("bluestore_clone_cow", "true");
g_ceph_context->_conf->set_val_or_die("bluestore_max_alloc_size", "196608");
// set small cache sizes so we see trimming during Synthetic tests
g_ceph_context->_conf->set_val_or_die("bluestore_cache_size_hdd", "4000000");
g_ceph_context->_conf->set_val_or_die("bluestore_cache_size_ssd", "4000000");
// very short *_max prealloc so that we fall back to async submits
g_ceph_context->_conf->set_val_or_die("bluestore_blobid_prealloc", "10");
g_ceph_context->_conf->set_val_or_die("bluestore_nid_prealloc", "10");
g_ceph_context->_conf->set_val_or_die("bluestore_debug_randomize_serial_transaction",
"10");
g_ceph_context->_conf->set_val_or_die("bdev_debug_aio", "true");
// specify device size
g_ceph_context->_conf->set_val_or_die("bluestore_block_size",
stringify(DEF_STORE_TEST_BLOCKDEV_SIZE));
g_ceph_context->_conf->set_val_or_die(
"enable_experimental_unrecoverable_data_corrupting_features", "*");
g_ceph_context->_conf->apply_changes(NULL);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
/*
* Local Variables:
* compile-command: "cd ../.. ; make ceph_test_objectstore &&
* ./ceph_test_objectstore \
* --gtest_filter=*.collect_metadata* --log-to-stderr=true --debug-filestore=20
* "
* End:
*/

1.2 模板元编程

模板示例

这里自己实现一个判断两个对象是否相等的模板函数。同时也定义一个模板类。

#include <iostream>
template<typename T>
bool equ(const T &a, const T &b) {
return !(a < b) && !(b < a);
}
template<typename T>
class number {
public:
number(const T &v): v_(v) { }
bool operator < (const number &n) const ;
private:
T v_;
};
template<typename T>
bool number<T>::operator <(const number &n) const {
return v_ < n.v_;
}
int main(void) {
number<int> a(1), b(1);
std::cout << equ(a, b) << std::endl;
std::cout << equ<double>(1.0, 2.0) << std::endl;
return 0;
}

模板的参数

  • 类型参数(type template parameter),用 typename 或 class 标记;(最好是用typename)
  • 非类型参数(non-type template parameter)可以是:整数及枚举类型、对象或函数的指针、对象或函数的引用、对象的成员指针,非类型参数是模板实例的常量;
  • 模板型参数(template template parameter),如template<typename T, template<typename> class A> someclass {};
  • 模板参数可以有默认值(函数模板参数默认是从 C++11 开始支持);
  • 函数模板的和函数参数类型有关的模板参数可以自动推导,类模板参数不存在推导机制
  • C++11 引入变长模板参数。

总结一下:

  • 参数必须是字面量。无论常数,类型,自定义类型,模板类型都是编译时的常量。运行时的变量是不能用的。
  • 类和函数模板参数都可以有默认值
  • 函数可以自动推导,类模板不可以。

运行时变量不可以

#include <iostream>
template<typename T, int N>
struct Vec{
T v_[N];
};
template<>
struct Vec<float, 4> {
float v_[4];
};
template<int N>
struct Vec<bool, N>{
char v_[(N+sizeof(char)-1)/sizeof(char)];
};
int main(void) {
int N = 10; // 这里是一个变量
Vec<bool, N> bits; // Compile ERROR! 利用变量来模板化类
return 0;
}

那么函数是否可以呢?

特化

特化的时候,可以:

  • Base模板里面的类具体化。
  • 只具体化某个模板参数,其他的保留。

所有的参数具体化

template<typename T, int N>
struct Vec{
T v_[N];
};
template<>
struct Vec<float, 4> {
float v_[4];
};

比如在base类里面声明了两个参数,这两个参数都可以具体化。T = float, N = 4

具体化某部分参数

base类的参数是typename T, int N,那么可不可以只具体化typename T = bool呢?答案是可以的。

template<typename T, int N>
struct Vec{
T v_[N];
};
template<int N>
struct Vec<bool, N> {
char v_[(N+sizeof(char)-1)/sizeof(char)];
};

术语:

  • 最简单的情况是对每个模板参数指定一个具体值,这成为完全特例化(full specialization),

  • 另外,可以限制模板参数在一个范围取值或满足一定关系等,这称为部分特例化(partial specialization

用数学上集合的概念,通例模板参数所有可取的值组合构成全集U,完全特例化对U中某个元素进行专门定义,部分特例化对U的某个真子集进行专门定义。

1.3 CompatSet

reference

https://blog.csdn.net/litianze99/article/details/74596843

CompatSet是一个结构体类型,其用于对特性兼容性进行管理。该类型的定义位于src/include/CompatSet.h文件中。

为什么

OSD对外提供一些功能特性,这个些特性需要OSD后端的存储驱动(或者文件系统)如filestore支持,如果后端驱动不支持,即两者之间在某些特性上不能兼容,就会影响读写操作,所以谨慎处理这些特性的兼容性是非常重要的。

struct CompatSet {
struct Feature {
uint64_t id;
std::string name;
Feature(uint64_t _id, const std::string& _name) : id(_id), name(_name) {}
};
class FeatureSet {
uint64_t mask;
std::map<uint64_t, std::string> names;
}
// These features have no impact on the read / write status
FeatureSet compat;
// If any of these features are missing, read is possible ( as long
// as no incompat feature is missing ) but it is not possible to write
FeatureSet ro_compat;
// If any of these features are missing, read or write is not possible
FeatureSet incompat;

Feature

该类型包含两个重要属性:

  • id:特性的唯一标识
  • name:特性的名字

FeatureSet

该类型包含两个重要属性:

  • mask:标识该组特性的位图
  • names:是一个map,key为特性的id,value为特性的name

属性

该类型中包含了三个重要的属性,分别是:compatro_compatincompat,都是FeatureSet实例。

  • compat:该组中的特性支持与否,对读写没有任何影响。
  • ro_compat:该组中的特性,如果不支持,则会影响写入操作,读操作没有影响。
  • incompat:该组中的特性,如果不支持,则会影响读写操作。

主要的功能与接口

readable

// 这个文件系统是否实现了相应的接口,进而可以读取另外一个file system.
/* does this filesystem implementation have the
features required to read the other? */
bool CompatSet::readable(CompatSet const& other) const {
return !((other.incompat.mask ^ incompat.mask) & other.incompat.mask);
}

writeable

// 这个文件系统的实现,是否有需要写other时所需的特性
/* does this filesystem implementation have the
features required to write the other? */
bool writeable(CompatSet const& other) const {
return readable(other) &&
!((other.ro_compat.mask ^ ro_compat.mask) & other.ro_compat.mask);
}

compare

注意这里的比较是基于特性的比较。a<b时是不能保证b<a的。这是因为

  • 0 表示两者的特性集是一样
  • 1 表示a是b的超集
  • -1表示a缺少b里面至少一个特性
/* Compare this CompatSet to another.
* CAREFULLY NOTE: This operation is NOT commutative.
* a > b DOES NOT imply that b < a.
* If returns:
* 0: The CompatSets have the same feature set.
* 1: This CompatSet's features are a strict superset of the other's.
* -1: This CompatSet is missing at least one feature
* described in the other. It may still have more features, though.
*/
int CompatSet::compare(const CompatSet& other) {
if ((other.compat.mask == compat.mask) &&
(other.ro_compat.mask == ro_compat.mask) &&
(other.incompat.mask == incompat.mask)) return 0;
//okay, they're not the same
//if we're writeable we have a superset of theirs on incompat and ro_compat
if (writeable(other) && !((other.compat.mask ^ compat.mask)
& other.compat.mask)) return 1;
//if we make it here, we weren't writeable or had a difference compat set
return -1;
}

unsupported

1.4 ObjectStore的接口

标志位

// Flag bits
typedef uint32_t osflagbits_t;
const int SKIP_JOURNAL_REPLAY = 1 << 0;
const int SKIP_MOUNT_OMAP = 1 << 1;

这里主要是工程意义上的写法比较有意思,比如要设置第x位,就
1<<x

成员以及函数

以下部分都是ObjectStore的成员或者成员函数。

路径

ObjectStore上下文以及路径。

// ObjectStore的路径
string path;
CephContext* cct;

工厂方法

好像很多抽象类在有很多派生类的时候,都很喜欢用这种create工厂方法。

/**
* create - create an ObjectStore instance.
*
* This is invoked once at initialization time.
*
* @param type type of store. This is a string from the configuration file.
* @param data path (or other descriptor) for data
* @param journal path (or other descriptor) for journal (optional)
* @param flags which filestores should check if applicable
*/
static ObjectStore *create(CephContext *cct,
const string& type,
const string& data,
const string& journal,
osflagbits_t flags = 0);

可以看到,这里需要指定:

  • 需要的ObjectStore的类型
  • 需要的ObjectStore的数据区
  • ObjectStore的日志区
  • ObjectStore的标志位

这里的标志位主要是两个:

  1. 是否需要journal重放
  2. 是否需要mount omap

注意看前面给出的两个SKIP_标志位。

读取fsid

/**
* probe a block device to learn the uuid of the owning OSD
*
* @param cct cct
* @param path path to device
* @param fsid [out] osd uuid
*/
static int probe_block_device_fsid(
CephContext *cct,
const string& path,
uuid_d *fsid);

这里是用来读取ObjectStorefsid,也就是看一下属于哪个cluster。
不同的ObjectStore的实现是不一样的。

int ObjectStore::probe_block_device_fsid(
CephContext *cct,
const string& path,
uuid_d *fsid)
{
int r;
#if defined(WITH_BLUESTORE)
// first try bluestore -- it has a crc on its header and will fail
// reliably.
r = BlueStore::get_block_device_fsid(cct, path, fsid);
if (r == 0) {
lgeneric_dout(cct, 0) << __func__ << " " << path << " is bluestore, "
<< *fsid << dendl;
return r;
}
#endif
// okay, try FileStore (journal).
r = FileStore::get_block_device_fsid(cct, path, fsid);
if (r == 0) {
lgeneric_dout(cct, 0) << __func__ << " " << path << " is filestore, "
<< *fsid << dendl;
return r;
}
return -EINVAL;
}

从代码的设计上来,实际上最好是把各种代码,分散到自己的实现里面。而不是在这里通过if else来调用。这里稍微通过FileStore展开一下。

int FileStore::get_block_device_fsid(CephContext* cct, const string& path,
uuid_d *fsid)
{
// make sure we don't try to use aio or direct_io (and get annoying
// error messages from failing to do so); performance implications
// should be irrelevant for this use
FileJournal j(cct, *fsid, 0, 0, path.c_str(), false, false);
return j.peek_fsid(*fsid);
}
// This can not be used on an active journal
int FileJournal::peek_fsid(uuid_d& fsid)
{
assert(fd == -1);
int r = _open(false, false);
if (r)
return r;
r = read_header(&header);
if (r < 0)
goto out;
fsid = header.fsid;
out:
close();
return r;
}

可以看出这段代码实际上就是打开journal,然后读取journal的头部,从头部中拿到journalheader信息,然后再取出其中的fsid

获取ObjectStore的性能数据

通过注释可以看出,这里主要是获取ObjectStorecommit/apply的latency信息。

/**
* Fetch Object Store statistics.
*
* Currently only latency of write and apply times are measured.
*
* This appears to be called with nothing locked.
*/
virtual objectstore_perf_stat_t get_cur_stats() = 0;

那么这里可以展开get_cur_stats看一下FileStore.h是如何处理的。

struct FSPerfTracker {
PerfCounters::avg_tracker<uint64_t> os_commit_latency_ns;
PerfCounters::avg_tracker<uint64_t> os_apply_latency_ns;
objectstore_perf_stat_t get_cur_stats() const {
objectstore_perf_stat_t ret;
ret.os_commit_latency_ns = os_commit_latency_ns.current_avg();
ret.os_apply_latency_ns = os_apply_latency_ns.current_avg();
return ret;
}
void update_from_perfcounters(PerfCounters &logger);
} perf_tracker;
objectstore_perf_stat_t get_cur_stats() override {
perf_tracker.update_from_perfcounters(*logger);
return perf_tracker.get_cur_stats();
}

可以看出来,这里更新的变量主要是:

  • os_commit_latency_ns
  • os_apply_latency_ns

拿到性能计数器

/**
* Fetch Object Store performance counters.
*
*
* This appears to be called with nothing locked.
*/
virtual const PerfCounters* get_perf_counters() const = 0;

这里大部分子类,如果存在性能计数器,那么基本上都是一句话:

const PerfCounters* get_perf_counters() const override {
return logger;
}

Collection

Ceph在通常情况下认为事务之间是没有相关性的,也就是说事务A事务B可以用任何顺序来进行提交。那么如果是用户对于某些事务有先后顺序的要求呢?

比如一定要事务A, 事务B,事务C,这个时候就需要利用CollectionA,B,C排好序并且放在一起。

/**
* 一个collection里面包含的是一系列有先后顺序的事务
* 在同一个collection里面的事务队列,在apply的时候,必面根据先后顺序一个一个来。
* 在不同collection的事务是可以并行提交的。
*
* ObjectStore users可以得到collection的指针,通过两种方式
* - open_collection()
* - create_new_collection()
*/
struct CollectionImpl : public RefCountedObject {
const coll_t cid;
CollectionImpl(const coll_t& c)
: RefCountedObject(NULL, 0),
cid(c) {}
/// wait for any queued transactions to apply
// block until any previous transactions are visible. specifically,
// collection_list and collection_empty need to reflect prior operations.
// flush函数的作用就是一个一个地apply transactions
// 必须要等到前面的事务都生效之后,后面的事务才可以推进。
// collection_list()和collection_empty()这两个函数
// 需要反映之前的操作。
virtual void flush() = 0;

问题暂时不去管这个collection_listcollection_empty这两个函数的具体作用。

/**
* Async flush_commit
* 这个是异步flush commit
* 两种情况:
* 1. collection当前是空闲的,flush_commit返回true
* c不动
* 2. collection并不空闲,这个方法返回false并且c会被异步调用.
* 一旦这个collection里面所有的事务都先于flush_commit函数applied/commited了
* 那么一个0值就会返回
*/
virtual bool flush_commit(Context *c) = 0;
const coll_t &get_cid() { return cid; }
};
// 定义Collection的句柄
typedef boost::intrusive_ptr<CollectionImpl> CollectionHandle;

Object的内容与语义

所有ObjectStore里面的objects都是唯一的,无论是ghobject_t和hobject_t
ObjectStore的操作支持创建,修改,删除,罗列collection中的objects

但是这里的罗列是根据object key来进行排列的。所有的object name在整个Ceph系统里面
都是唯一的。

每个object都会有三个离散的三个部分:

  • 数据
  • xattrs
  • omap_header
  • omap_entries

关于Omap可以看一下这个链表:http://bean-li.github.io/ceph-omap/

简单地概述一下就是。

FileStore的omap中存放的都是对象的属性信息,以key-value的形式存在,那么对于不同的属性,如何定义对象的键值key呢?
最直接的想法就是(object_id + xattr_key),两者结合一起,形成对象的键值key,但是这种方法有一个问题,object_id可能会很长,尤其是当单个对象存在很多属性的时候,object_id不得不在key值中出现多次,这必然会造成存储空间的浪费。
Ceph的FileStore分成了2步:
第一步: 根据object_id生成一个比较短的seq,然后把这个seq存放到omap_header中。
第二步: 然后seq + xattr_key形成对象的某个属性的键值。

如何生成seq

如果是LevelDB来实现Omap的话,那么就是在LevelDB中存储一个OSD当前全局的key值。

key: SYS_PREFIX + GLOBAL_STATE_KEY
value: state

要申请seq的时候,针对这个seq上锁然后递增。seq是放在state里面的。state的内容就存放到LevelDB中。

struct State {
__u8 v;
uint64_t seq;
};

object_id到seq

struct _Header {
uint64_t seq;
uint64_t parent;
uint64_t num_children;
coll_t c;
ghobject_t oid;
SequencerPosition spos;
};

当生成seq之后,立即生成一个header结构。然后把这个header存放到LevelDB中。

key: HOBJECT_TO_SEQ + ghobject_key(oid)
value: header

Object的data

object的数据部分理念上是等价于一个文件系统里面的文件。对于object的随机和部分读写都要可以进行。对于数据部分的稀疏处理并不是一个强需求。一般而言,单个object不要太大,大的话一般100MB左右。

Object的xattrs

xattrs主要是存放在文件系统的attrs上。而omap一般则是存放在leveldb上。

/*********************************
* 事务
*
* 一个事务包含了一系列修改操作。
*
* 一个事务的三个事件会导致回调。任何一个事务都会带如下的
* 回调函数。
*
* on_applied_sync, on_applied, and on_commit.
*
* `on_applied`和`on_applied_sync`这两个回调都是在修改正式生效之后才会被触发。所谓的修改生效就是指修改被后面的操作可见。
*
* 唯一理论上的差异`on_applied`和`on_applied_sync`是在于callback发生的操作线程以及锁环境。`on_applied_sync`语意上就是说直接会被执行线程触发,往往是在急着要执行,并且在当前的环境下不能持有锁的调用环境。(去申请锁可能会导致wait也就是等待)
* 相反地`on_applied`则是另外一个Finisher线程来调用的。这也就是意味着调用环境满足去申请锁的各种条件(这里主要是指申请的时候可以wait)。
* 需要注意的是:on_applied和on_applied_sync有时候也会被叫做on_readable和on_readable_sync。
*
* on_commit回调则肯定是由另外一个Finisher线程来调用的。并且所有的修改操作已经写到journal上。也就是持久化了。
*
* 就从journal写日志的实现上来说,每次原始的修改(包含相关的数据)都可以被串行化到一个单一的buffer里面。这个串行化并不会拷贝任何数据本身,而是直接引用到原有的数据。这样一来,就需要原有的数据保持不变,直接on_commit回调函数成功。在实践上,缓冲区处理所有的这种情况,主要是通过bufferlist::raw_static引用到相应的data缓冲区。
*
* 一些ObjectStore的实施选实施他们自己形式的journal并且利用串行化来实现一个事务。在这种情况下就需要保证encode/decode逻辑合理地处理好version,并且要处理好升级。
*
*
* TRANSACTION ISOLATION 事务独立性
*
* 事务的独立性是由于调用方来实施的。除此之外,独立性是指,
* object相关的四个部分被一个事务修改/删除的时候,调用方并不会说去读取这个object的相应元素,特别是当这个事务有可能被阻塞的情况下。这里阻塞指的是`one_applied_sync`回调被执行了。
* 对于这个规则的违反并不会被ObjectStore所监管到。并且也不会有相应的错误被raise出来。
* 简单地说就是事务与事务之间是相互独立的,事务之间的关系,需要调用者来加以保证。
* Except as noted above, isolation is the responsibility of the
* caller. In other words, if any storage element (storage element
* == any of the four portions of an object as described above) is
* altered by a transaction (including deletion), the caller
* promises not to attempt to read that element while the
* transaction is pending (here pending means from the time of
* issuance until the "on_applied_sync" callback has been
* received). Violations of isolation need not be detected by
* ObjectStore and there is no corresponding error mechanism for
* reporting an isolation violation (crashing would be the
* appropriate way to report an isolation violation if detected).
*
* Enumeration operations may violate transaction isolation as
* described above when a storage element is being created or
* deleted as part of a transaction. In this case, ObjectStore is
* allowed to consider the enumeration operation to either precede
* or follow the violating transaction element. In other words, the
* presence/absence of the mutated element in the enumeration is
* entirely at the discretion of ObjectStore. The arbitrary ordering
* applies independently to each transaction element. For example,
* if a transaction contains two mutating elements "create A" and
* "delete B". And an enumeration operation is performed while this
* transaction is pending. It is permissable for ObjectStore to
* report any of the four possible combinations of the existence of
* A and B.
*
*/

事务

class Transaction {
public:
// 这里有点类似于设计了一套指令。
enum {
OP_NOP = 0,
OP_TOUCH = 9, // cid, oid
OP_WRITE = 10, // cid, oid, offset, len, bl
OP_ZERO = 11, // cid, oid, offset, len
OP_TRUNCATE = 12, // cid, oid, len
OP_REMOVE = 13, // cid, oid
OP_SETATTR = 14, // cid, oid, attrname, bl
OP_SETATTRS = 15, // cid, oid, attrset
OP_RMATTR = 16, // cid, oid, attrname
OP_CLONE = 17, // cid, oid, newoid
OP_CLONERANGE = 18, // cid, oid, newoid, offset, len
OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff
OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED**
OP_MKCOLL = 20, // cid
OP_RMCOLL = 21, // cid
OP_COLL_ADD = 22, // cid, oldcid, oid
OP_COLL_REMOVE = 23, // cid, oid
OP_COLL_SETATTR = 24, // cid, attrname, bl
OP_COLL_RMATTR = 25, // cid, attrname
OP_COLL_SETATTRS = 26, // cid, attrset
OP_COLL_MOVE = 8, // newcid, oldcid, oid
OP_RMATTRS = 28, // cid, oid
OP_COLL_RENAME = 29, // cid, newcid
OP_OMAP_CLEAR = 31, // cid
OP_OMAP_SETKEYS = 32, // cid, attrset
OP_OMAP_RMKEYS = 33, // cid, keyset
OP_OMAP_SETHEADER = 34, // cid, header
OP_SPLIT_COLLECTION = 35, // cid, bits, destination
OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination
doesn't create the destination */
OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey
OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid
OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size
OP_COLL_HINT = 40, // cid, type, bl
OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
OP_COLL_SET_BITS = 42, // cid, bits
};
// Transaction hint type
enum {
COLL_HINT_EXPECTED_NUM_OBJECTS = 1,
};
// 真正的操作
struct Op {
__le32 op; // 这里用数字来表示操作的类型,也可以看做是指令的类型
__le32 cid;
__le32 oid;
__le64 off;
__le64 len;
__le32 dest_cid;
__le32 dest_oid; //OP_CLONE, OP_CLONERANGE
__le64 dest_off; //OP_CLONERANGE
union {
struct {
__le32 hint_type; //OP_COLL_HINT
};
struct {
__le32 alloc_hint_flags; //OP_SETALLOCHINT
};
};
__le64 expected_object_size; //OP_SETALLOCHINT
__le64 expected_write_size; //OP_SETALLOCHINT
__le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS,
//OP_MKCOLL
__le32 split_rem; //OP_SPLIT_COLLECTION2
} __attribute__ ((packed)) ;
//
struct TransactionData {
__le64 ops; // 这个应该是指的操作的数量
__le32 largest_data_len;
__le32 largest_data_off;
__le32 largest_data_off_in_data_bl;
__le32 fadvise_flags;
} __attribute__ ((packed)) ;
private:
TransactionData data;
map<coll_t, __le32> coll_index;
map<ghobject_t, __le32> object_index;
__le32 coll_id {0};
__le32 object_id {0};
bufferlist data_bl;
bufferlist op_bl;
bufferptr op_ptr;
list<Context *> on_applied;
list<Context *> on_commit;
list<Context *> on_applied_sync;
public:
void _update_op(Op* op,
vector<__le32> &cm,
vector<__le32> &om) {
// 根据情况来决定是否需要更新collection id
// 或者是object id
// 根据op的类型来决定
op->cid = cm[op->cid];
op->oid = om[op->oid];
op->dest_oid = om[op->dest_oid];
}
// bl里面是一个list
// list里面的每个元素都是一个Op结构
// 然后再通过_update_op(op_memory, cm, om)
// 来进行更新
void _update_op_bl(
bufferlist& bl,
vector<__le32> &cm,
vector<__le32> &om)
{
list<bufferptr> list = bl.buffers();
std::list<bufferptr>::iterator p;
for(p = list.begin(); p != list.end(); ++p) {
assert(p->length() % sizeof(Op) == 0);
char* raw_p = p->c_str();
char* raw_end = raw_p + p->length();
while (raw_p < raw_end) {
_update_op(reinterpret_cast<Op*>(raw_p), cm, om);
raw_p += sizeof(Op);
}
}
}
/// Append the operations of the parameter to this Transaction.
// Those operations are removed from the parameter Transaction
// 这里更加类似于两个事务的合并,注意:
// other.op_bl是深度复制了的。
// ohter.data_bl则是没有深度复制
// 可能是觉得other还会在别的地方会有用处
void append(Transaction& other) {
data.ops += other.data.ops;
if (other.data.largest_data_len > data.largest_data_len) {
data.largest_data_len = other.data.largest_data_len;
data.largest_data_off = other.data.largest_data_off;
data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl;
}
data.fadvise_flags |= other.data.fadvise_flags;
// splice的含义是把另外一个list放到on_applied/on_commit后面
// splice函数是说
// splice(Iterator position, list<T> l);
// 把l插入到postion位置。然后l里面的元素被move过去。所以
// 操作之后l变成空的了。
on_applied.splice(on_applied.end(), other.on_applied);
on_commit.splice(on_commit.end(), other.on_commit);
on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync);
//append coll_index & object_index
// cm新生成,后面用来更新
vector<__le32> cm(other.coll_index.size());
map<coll_t, __le32>::iterator coll_index_p;
for (coll_index_p = other.coll_index.begin();
coll_index_p != other.coll_index.end();
++coll_index_p) {
// 这里更新cm这个vector
cm[coll_index_p->second] = _get_coll_id(coll_index_p->first);
}
vector<__le32> om(other.object_index.size());
map<ghobject_t, __le32>::iterator object_index_p;
for (object_index_p = other.object_index.begin();
object_index_p != other.object_index.end();
++object_index_p) {
// 这里更新的是om这个vector
om[object_index_p->second] = _get_object_id(object_index_p->first);
}
// other.op_bl在这里是不能被更改的
//the other.op_bl SHOULD NOT be changes during append operation,
// 这里使用了另外一个bufferlist来处理这种case.
//we use additional bufferlist to avoid this problem
// 申请一个新的内存,长度为other.op_bl.length()
bufferptr other_op_bl_ptr(other.op_bl.length());
// 这里把other.op_bl里面的内容复制到新申请的内存里
other.op_bl.copy(0, other.op_bl.length(), other_op_bl_ptr.c_str());
bufferlist other_op_bl;
// 注意这里是一个list<bufferptr>, 所以这里用append把前面的内存缓冲区放进去
other_op_bl.append(other_op_bl_ptr);
//update other_op_bl with cm & om
//When the other is appended to current transaction, all coll_index and
//object_index in other.op_buffer should be updated by new index of the
//combined transaction
// 然后利用list<buffer>把当前的transaction更新一把
_update_op_bl(other_op_bl, cm, om);
//append op_bl
// 把other的op_bl list append到op_bl里面
// 完成两个事务的op的合并
op_bl.append(other_op_bl);
//append data_bl
// data bl也是需要合并
data_bl.append(other.data_bl);
}
/** Inquires about the Transaction as a whole. */
/// How big is the encoded Transaction buffer?
// 得到整个事务的长度
// 感觉这里不应该老是去计算
// 最好是有办法去优化
uint64_t get_encoded_bytes() {
//layout: data_bl + op_bl + coll_index + object_index + data
// coll_index size, object_index size and sizeof(transaction_data)
// all here, so they may be computed at compile-time
size_t final_size = sizeof(__u32) * 2 + sizeof(data);
// coll_index second and object_index second
final_size += (coll_index.size() + object_index.size()) * sizeof(__le32);
// coll_index first
for (auto p = coll_index.begin(); p != coll_index.end(); ++p) {
final_size += p->first.encoded_size();
}
// object_index first
for (auto p = object_index.begin(); p != object_index.end(); ++p) {
final_size += p->first.encoded_size();
}
return data_bl.length() +
op_bl.length() +
final_size;
}
uint64_t get_num_bytes() {
return get_encoded_bytes();
}
/// Size of largest data buffer to the "write" operation encountered so far
uint32_t get_data_length() {
return data.largest_data_len;
}
/// offset within the encoded buffer to the start of the largest data buffer that's encoded
uint32_t get_data_offset()
{
if (data.largest_data_off_in_data_bl) {
return data.largest_data_off_in_data_bl +
sizeof(__u8) + // encode struct_v
sizeof(__u8) + // encode compat_v
sizeof(__u32) + // encode len
sizeof(__u32); // data_bl len
}
return 0; // none
}
/// offset of buffer as aligned to destination within object.
int get_data_alignment()
{
if (!data.largest_data_len)
return 0;
return (0 - get_data_offset()) & ~CEPH_PAGE_MASK;
}
/// Is the Transaction empty (no operations)
bool empty()
{
// data里面的ops就是用来计数ops操作的数目
return !data.ops;
}
/// Number of operations in the transation
int get_num_ops()
{
return data.ops;
}
/**
* iterator
*
* Helper object to parse Transactions.
*
* ObjectStore instances use this object to step down the encoded
* buffer decoding operation codes and parameters as we go.
*
*/
class iterator
{
Transaction *t;
uint64_t ops;
char* op_buffer_p;
bufferlist::const_iterator data_bl_p;
public:
vector<coll_t> colls;
vector<ghobject_t> objects;
private:
explicit iterator(Transaction *t)
: t(t),
data_bl_p(t->data_bl.cbegin()),
colls(t->coll_index.size()),
objects(t->object_index.size())
{
ops = t->data.ops;
op_buffer_p = t->op_bl.get_contiguous(0, t->data.ops * sizeof(Op));
map<coll_t, __le32>::iterator coll_index_p;
for (coll_index_p = t->coll_index.begin();
coll_index_p != t->coll_index.end();
++coll_index_p) {
colls[coll_index_p->second] = coll_index_p->first;
}
map<ghobject_t, __le32>::iterator object_index_p;
for (object_index_p = t->object_index.begin();
object_index_p != t->object_index.end();
++object_index_p) {
objects[object_index_p->second] = object_index_p->first;
}
}
friend class Transaction;
public:
bool have_op()
{
return ops > 0;
}
Op* decode_op()
{
assert(ops > 0);
Op* op = reinterpret_cast<Op*>(op_buffer_p);
op_buffer_p += sizeof(Op);
ops--;
return op;
}
string decode_string()
{
using ceph::decode;
string s;
decode(s, data_bl_p);
return s;
}
void decode_bp(bufferptr& bp)
{
using ceph::decode;
decode(bp, data_bl_p);
}
void decode_bl(bufferlist& bl)
{
using ceph::decode;
decode(bl, data_bl_p);
}
void decode_attrset(map<string,bufferptr>& aset)
{
using ceph::decode;
decode(aset, data_bl_p);
}
void decode_attrset(map<string,bufferlist>& aset)
{
using ceph::decode;
decode(aset, data_bl_p);
}
void decode_attrset_bl(bufferlist *pbl)
{
decode_str_str_map_to_bl(data_bl_p, pbl);
}
void decode_keyset(set<string> &keys)
{
using ceph::decode;
decode(keys, data_bl_p);
}
void decode_keyset_bl(bufferlist *pbl)
{
decode_str_set_to_bl(data_bl_p, pbl);
}
const ghobject_t &get_oid(__le32 oid_id)
{
assert(oid_id < objects.size());
return objects[oid_id];
}
const coll_t &get_cid(__le32 cid_id)
{
assert(cid_id < colls.size());
return colls[cid_id];
}
uint32_t get_fadvise_flags() const
{
return t->get_fadvise_flags();
}
};
iterator begin()
{
return iterator(this);
}
private:
void _build_actions_from_tbl();
/**
* Helper functions to encode the various mutation elements of a
* transaction. These are 1:1 with the operation codes (see
* enumeration above). These routines ensure that the
* encoder/creator of a transaction gets the right data in the
* right place. Sadly, there's no corresponding version nor any
* form of seat belts for the decoder.
*/
Op* _get_next_op()
{
if (op_ptr.length() == 0 || op_ptr.offset() >= op_ptr.length()) {
op_ptr = bufferptr(sizeof(Op) * OPS_PER_PTR);
}
bufferptr ptr(op_ptr, 0, sizeof(Op));
op_bl.append(ptr);
op_ptr.set_offset(op_ptr.offset() + sizeof(Op));
char* p = ptr.c_str();
memset(p, 0, sizeof(Op));
return reinterpret_cast<Op*>(p);
}
__le32 _get_coll_id(const coll_t& coll)
{
map<coll_t, __le32>::iterator c = coll_index.find(coll);
if (c != coll_index.end())
return c->second;
__le32 index_id = coll_id++;
coll_index[coll] = index_id;
return index_id;
}
__le32 _get_object_id(const ghobject_t& oid)
{
map<ghobject_t, __le32>::iterator o = object_index.find(oid);
if (o != object_index.end())
return o->second;
__le32 index_id = object_id++;
object_index[oid] = index_id;
return index_id;
}
public:
// 接下来这里生成各种事务的参数,指令
/// noop. 'nuf said
void nop()
{
Op* _op = _get_next_op();
_op->op = OP_NOP;
data.ops++;
}
/**
* touch
*
* Ensure the existance of an object in a collection. Create an
* empty object if necessary
*/
void touch(const coll_t& cid, const ghobject_t& oid)
{
Op* _op = _get_next_op();
_op->op = OP_TOUCH;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
data.ops++;
}
/**
* Write data to an offset within an object. If the object is too
* small, it is expanded as needed. It is possible to specify an
* offset beyond the current end of an object and it will be
* expanded as needed. Simple implementations of ObjectStore will
* just zero the data between the old end of the object and the
* newly provided data. More sophisticated implementations of
* ObjectStore will omit the untouched data and store it as a
* "hole" in the file.
*
* Note that a 0-length write does not affect the size of the object.
*/
void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len,
const bufferlist& write_data, uint32_t flags = 0)
{
using ceph::encode;
uint32_t orig_len = data_bl.length();
Op* _op = _get_next_op();
_op->op = OP_WRITE;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
_op->off = off;
_op->len = len;
encode(write_data, data_bl);
assert(len == write_data.length());
data.fadvise_flags = data.fadvise_flags | flags;
if (write_data.length() > data.largest_data_len) {
data.largest_data_len = write_data.length();
data.largest_data_off = off;
data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to
}
data.ops++;
}
/**
* zero out the indicated byte range within an object. Some
* ObjectStore instances may optimize this to release the
* underlying storage space.
*
* If the zero range extends beyond the end of the object, the object
* size is extended, just as if we were writing a buffer full of zeros.
* EXCEPT if the length is 0, in which case (just like a 0-length write)
* we do not adjust the object size.
*/
void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len)
{
Op* _op = _get_next_op();
_op->op = OP_ZERO;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
_op->off = off;
_op->len = len;
data.ops++;
}
/// Discard all data in the object beyond the specified size.
void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off)
{
Op* _op = _get_next_op();
_op->op = OP_TRUNCATE;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
_op->off = off;
data.ops++;
}
/// Remove an object. All four parts of the object are removed.
void remove(const coll_t& cid, const ghobject_t& oid)
{
Op* _op = _get_next_op();
_op->op = OP_REMOVE;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
data.ops++;
}
/// Set an xattr of an object
void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, bufferlist& val)
{
string n(name);
setattr(cid, oid, n, val);
}
/// Set an xattr of an object
void setattr(const coll_t& cid, const ghobject_t& oid, const string& s, bufferlist& val)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_SETATTR;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(s, data_bl);
encode(val, data_bl);
data.ops++;
}
/// Set multiple xattrs of an object
void setattrs(const coll_t& cid, const ghobject_t& oid, const map<string,bufferptr>& attrset)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_SETATTRS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(attrset, data_bl);
data.ops++;
}
/// Set multiple xattrs of an object
void setattrs(const coll_t& cid, const ghobject_t& oid, const map<string,bufferlist>& attrset)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_SETATTRS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(attrset, data_bl);
data.ops++;
}
/// remove an xattr from an object
void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name)
{
string n(name);
rmattr(cid, oid, n);
}
/// remove an xattr from an object
void rmattr(const coll_t& cid, const ghobject_t& oid, const string& s)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_RMATTR;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(s, data_bl);
data.ops++;
}
/// remove all xattrs from an object
void rmattrs(const coll_t& cid, const ghobject_t& oid)
{
Op* _op = _get_next_op();
_op->op = OP_RMATTRS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
data.ops++;
}
/**
* Clone an object into another object.
*
* Low-cost (e.g., O(1)) cloning (if supported) is best, but
* fallback to an O(n) copy is allowed. All four parts of the
* object are cloned (data, xattrs, omap header, omap
* entries).
*
* The destination named object may already exist, in
* which case its previous contents are discarded.
*/
void clone(const coll_t& cid, const ghobject_t& oid,
const ghobject_t& noid)
{
Op* _op = _get_next_op();
_op->op = OP_CLONE;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
_op->dest_oid = _get_object_id(noid);
data.ops++;
}
/**
* Clone a byte range from one object to another.
*
* The data portion of the destination object receives a copy of a
* portion of the data from the source object. None of the other
* three parts of an object is copied from the source.
*
* The destination object size may be extended to the dstoff + len.
*
* The source range *must* overlap with the source object data. If it does
* not the result is undefined.
*/
void clone_range(const coll_t& cid, const ghobject_t& oid,
const ghobject_t& noid,
uint64_t srcoff, uint64_t srclen, uint64_t dstoff)
{
Op* _op = _get_next_op();
_op->op = OP_CLONERANGE2;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
_op->dest_oid = _get_object_id(noid);
_op->off = srcoff;
_op->len = srclen;
_op->dest_off = dstoff;
data.ops++;
}
/// Create the collection
void create_collection(const coll_t& cid, int bits)
{
Op* _op = _get_next_op();
_op->op = OP_MKCOLL;
_op->cid = _get_coll_id(cid);
_op->split_bits = bits;
data.ops++;
}
/**
* Give the collection a hint.
*
* @param cid - collection id.
* @param type - hint type.
* @param hint - the hint payload, which contains the customized
* data along with the hint type.
*/
void collection_hint(const coll_t& cid, uint32_t type, const bufferlist& hint)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_COLL_HINT;
_op->cid = _get_coll_id(cid);
_op->hint_type = type;
encode(hint, data_bl);
data.ops++;
}
/// remove the collection, the collection must be empty
void remove_collection(const coll_t& cid)
{
Op* _op = _get_next_op();
_op->op = OP_RMCOLL;
_op->cid = _get_coll_id(cid);
data.ops++;
}
void collection_move(const coll_t& cid, const coll_t &oldcid, const ghobject_t& oid)
__attribute__ ((deprecated))
{
// NOTE: we encode this as a fixed combo of ADD + REMOVE. they
// always appear together, so this is effectively a single MOVE.
Op* _op = _get_next_op();
_op->op = OP_COLL_ADD;
_op->cid = _get_coll_id(oldcid);
_op->oid = _get_object_id(oid);
_op->dest_cid = _get_coll_id(cid);
data.ops++;
_op = _get_next_op();
_op->op = OP_COLL_REMOVE;
_op->cid = _get_coll_id(oldcid);
_op->oid = _get_object_id(oid);
data.ops++;
}
void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
const coll_t &cid, const ghobject_t& oid)
{
Op* _op = _get_next_op();
_op->op = OP_COLL_MOVE_RENAME;
_op->cid = _get_coll_id(oldcid);
_op->oid = _get_object_id(oldoid);
_op->dest_cid = _get_coll_id(cid);
_op->dest_oid = _get_object_id(oid);
data.ops++;
}
void try_rename(const coll_t &cid, const ghobject_t& oldoid,
const ghobject_t& oid)
{
Op* _op = _get_next_op();
_op->op = OP_TRY_RENAME;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oldoid);
_op->dest_oid = _get_object_id(oid);
data.ops++;
}
/// Remove omap from oid
void omap_clear(
const coll_t &cid, ///< [in] Collection containing oid
const ghobject_t &oid ///< [in] Object from which to remove omap
)
{
Op* _op = _get_next_op();
_op->op = OP_OMAP_CLEAR;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
data.ops++;
}
/// Set keys on oid omap. Replaces duplicate keys.
void omap_setkeys(
const coll_t& cid, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object to update
const map<string, bufferlist> &attrset ///< [in] Replacement keys and values
)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_OMAP_SETKEYS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(attrset, data_bl);
data.ops++;
}
/// Set keys on an oid omap (bufferlist variant).
void omap_setkeys(
const coll_t &cid, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object to update
const bufferlist &attrset_bl ///< [in] Replacement keys and values
)
{
Op* _op = _get_next_op();
_op->op = OP_OMAP_SETKEYS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
data_bl.append(attrset_bl);
data.ops++;
}
/// Remove keys from oid omap
void omap_rmkeys(
const coll_t &cid, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object from which to remove the omap
const set<string> &keys ///< [in] Keys to clear
)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_OMAP_RMKEYS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(keys, data_bl);
data.ops++;
}
/// Remove keys from oid omap
void omap_rmkeys(
const coll_t &cid, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object from which to remove the omap
const bufferlist &keys_bl ///< [in] Keys to clear
)
{
Op* _op = _get_next_op();
_op->op = OP_OMAP_RMKEYS;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
data_bl.append(keys_bl);
data.ops++;
}
/// Remove key range from oid omap
void omap_rmkeyrange(
const coll_t &cid, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
const string& first, ///< [in] first key in range
const string& last ///< [in] first key past range, range is [first,last)
)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_OMAP_RMKEYRANGE;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(first, data_bl);
encode(last, data_bl);
data.ops++;
}
/// Set omap header
void omap_setheader(
const coll_t &cid, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object
const bufferlist &bl ///< [in] Header value
)
{
using ceph::encode;
Op* _op = _get_next_op();
_op->op = OP_OMAP_SETHEADER;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
encode(bl, data_bl);
data.ops++;
}
/// Split collection based on given prefixes, objects matching the specified bits/rem are
/// moved to the new collection
void split_collection(
const coll_t &cid,
uint32_t bits,
uint32_t rem,
const coll_t &destination)
{
Op* _op = _get_next_op();
_op->op = OP_SPLIT_COLLECTION2;
_op->cid = _get_coll_id(cid);
_op->dest_cid = _get_coll_id(destination);
_op->split_bits = bits;
_op->split_rem = rem;
data.ops++;
}
void collection_set_bits(
const coll_t &cid,
int bits)
{
Op* _op = _get_next_op();
_op->op = OP_COLL_SET_BITS;
_op->cid = _get_coll_id(cid);
_op->split_bits = bits;
data.ops++;
}
/// Set allocation hint for an object
/// make 0 values(expected_object_size, expected_write_size) noops for all implementations
void set_alloc_hint(
const coll_t &cid,
const ghobject_t &oid,
uint64_t expected_object_size,
uint64_t expected_write_size,
uint32_t flags
)
{
Op* _op = _get_next_op();
_op->op = OP_SETALLOCHINT;
_op->cid = _get_coll_id(cid);
_op->oid = _get_object_id(oid);
_op->expected_object_size = expected_object_size;
_op->expected_write_size = expected_write_size;
_op->alloc_hint_flags = flags;
data.ops++;
}
};

事务入队

int queue_transaction(CollectionHandle& ch,
Transaction&& t,
TrackedOpRef op = TrackedOpRef(),
ThreadPool::TPHandle *handle = NULL)
{
vector<Transaction> tls;
tls.push_back(std::move(t));
return queue_transactions(ch, tls, op, handle);
}
virtual int queue_transactions(
CollectionHandle& ch, vector<Transaction>& tls,
TrackedOpRef op = TrackedOpRef(),
ThreadPool::TPHandle *handle = NULL) = 0;
public:
// versioning
virtual int upgrade() {
return 0;
}
virtual void get_db_statistics(Formatter *f) { }
virtual void generate_db_histogram(Formatter *f) { }
virtual void flush_cache() { }
virtual void dump_perf_counters(Formatter *f) {}
virtual string get_type() = 0;
// mgmt
virtual bool test_mount_in_use() = 0;
virtual int mount() = 0;
virtual int umount() = 0;
virtual int fsck(bool deep)
{
return -EOPNOTSUPP;
}
virtual int repair(bool deep)
{
return -EOPNOTSUPP;
}
virtual void set_cache_shards(unsigned num) { }
/**
* Returns 0 if the hobject is valid, -error otherwise
*
* Errors:
* -ENAMETOOLONG: locator/namespace/name too large
*/
virtual int validate_hobject_key(const hobject_t &obj) const = 0;
virtual unsigned get_max_attr_name_length() = 0;
virtual int mkfs() = 0; // wipe
virtual int mkjournal() = 0; // journal only
virtual bool needs_journal() = 0; //< requires a journal
virtual bool wants_journal() = 0; //< prefers a journal
virtual bool allows_journal() = 0; //< allows a journal
/// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda)
virtual int get_devices(std::set<string> *devls)
{
return -EOPNOTSUPP;
}
/// true if a txn is readable immediately after it is queued.
virtual bool is_sync_onreadable() const
{
return true;
}
/**
* is_rotational
*
* Check whether store is backed by a rotational (HDD) or non-rotational
* (SSD) device.
*
* This must be usable *before* the store is mounted.
*
* @return true for HDD, false for SSD
*/
virtual bool is_rotational()
{
return true;
}
/**
* is_journal_rotational
*
* Check whether journal is backed by a rotational (HDD) or non-rotational
* (SSD) device.
*
*
* @return true for HDD, false for SSD
*/
virtual bool is_journal_rotational()
{
return true;
}
virtual string get_default_device_class()
{
return is_rotational() ? "hdd" : "ssd";
}
virtual bool can_sort_nibblewise()
{
return false; // assume a backend cannot, unless it says otherwise
}
virtual int statfs(struct store_statfs_t *buf) = 0;
virtual void collect_metadata(map<string,string> *pm) { }
/**
* write_meta - write a simple configuration key out-of-band
*
* Write a simple key/value pair for basic store configuration
* (e.g., a uuid or magic number) to an unopened/unmounted store.
* The default implementation writes this to a plaintext file in the
* path.
*
* A newline is appended.
*
* @param key key name (e.g., "fsid")
* @param value value (e.g., a uuid rendered as a string)
* @returns 0 for success, or an error code
*/
virtual int write_meta(const std::string& key,
const std::string& value);
/**
* read_meta - read a simple configuration key out-of-band
*
* Read a simple key value to an unopened/mounted store.
*
* Trailing whitespace is stripped off.
*
* @param key key name
* @param value pointer to value string
* @returns 0 for success, or an error code
*/
virtual int read_meta(const std::string& key,
std::string *value);
/**
* get ideal max value for collection_list()
*
* default to some arbitrary values; the implementation will override.
*/
virtual int get_ideal_list_max()
{
return 64;
}
/**
* get a collection handle
*
* Provide a trivial handle as a default to avoid converting legacy
* implementations.
*/
virtual CollectionHandle open_collection(const coll_t &cid) = 0;
/**
* get a collection handle for a soon-to-be-created collection
*
* This handle must be used by queue_transaction that includes a
* create_collection call in order to become valid. It will become the
* reference to the created collection.
*/
virtual CollectionHandle create_new_collection(const coll_t &cid) = 0;
/**
* Synchronous read operations
*/
/**
* exists -- Test for existance of object
*
* @param cid collection for object
* @param oid oid of object
* @returns true if object exists, false otherwise
*/
virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0;
/**
* set_collection_opts -- set pool options for a collectioninformation for an object
*
* @param cid collection
* @param opts new collection options
* @returns 0 on success, negative error code on failure.
*/
virtual int set_collection_opts(
CollectionHandle& c,
const pool_opts_t& opts) = 0;
/**
* stat -- get information for an object
*
* @param cid collection for object
* @param oid oid of object
* @param st output information for the object
* @param allow_eio if false, assert on -EIO operation failure
* @returns 0 on success, negative error code on failure.
*/
virtual int stat(
CollectionHandle &c,
const ghobject_t& oid,
struct stat *st,
bool allow_eio = false) = 0;
/**
* read -- read a byte range of data from an object
*
* Note: if reading from an offset past the end of the object, we
* return 0 (not, say, -EINVAL).
*
* @param cid collection for object
* @param oid oid of object
* @param offset location offset of first byte to be read
* @param len number of bytes to be read
* @param bl output bufferlist
* @param op_flags is CEPH_OSD_OP_FLAG_*
* @returns number of bytes read on success, or negative error code on failure.
*/
virtual int read(
CollectionHandle &c,
const ghobject_t& oid,
uint64_t offset,
size_t len,
bufferlist& bl,
uint32_t op_flags = 0) = 0;
/**
* fiemap -- get extent map of data of an object
*
* Returns an encoded map of the extents of an object's data portion
* (map<offset,size>).
*
* A non-enlightened implementation is free to return the extent (offset, len)
* as the sole extent.
*
* @param cid collection for object
* @param oid oid of object
* @param offset location offset of first byte to be read
* @param len number of bytes to be read
* @param bl output bufferlist for extent map information.
* @returns 0 on success, negative error code on failure.
*/
virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
uint64_t offset, size_t len, bufferlist& bl) = 0;
virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
uint64_t offset, size_t len, map<uint64_t, uint64_t>& destmap) = 0;
/**
* getattr -- get an xattr of an object
*
* @param cid collection for object
* @param oid oid of object
* @param name name of attr to read
* @param value place to put output result.
* @returns 0 on success, negative error code on failure.
*/
virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
const char *name, bufferptr& value) = 0;
/**
* getattr -- get an xattr of an object
*
* @param cid collection for object
* @param oid oid of object
* @param name name of attr to read
* @param value place to put output result.
* @returns 0 on success, negative error code on failure.
*/
int getattr(
CollectionHandle &c, const ghobject_t& oid,
const string& name, bufferlist& value)
{
bufferptr bp;
int r = getattr(c, oid, name.c_str(), bp);
value.push_back(bp);
return r;
}
/**
* getattrs -- get all of the xattrs of an object
*
* @param cid collection for object
* @param oid oid of object
* @param aset place to put output result.
* @returns 0 on success, negative error code on failure.
*/
virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
map<string,bufferptr>& aset) = 0;
/**
* getattrs -- get all of the xattrs of an object
*
* @param cid collection for object
* @param oid oid of object
* @param aset place to put output result.
* @returns 0 on success, negative error code on failure.
*/
int getattrs(CollectionHandle &c, const ghobject_t& oid,
map<string,bufferlist>& aset)
{
map<string,bufferptr> bmap;
int r = getattrs(c, oid, bmap);
for (map<string,bufferptr>::iterator i = bmap.begin();
i != bmap.end();
++i) {
aset[i->first].append(i->second);
}
return r;
}
// collections
/**
* list_collections -- get all of the collections known to this ObjectStore
*
* @param ls list of the collections in sorted order.
* @returns 0 on success, negative error code on failure.
*/
virtual int list_collections(vector<coll_t>& ls) = 0;
/**
* does a collection exist?
*
* @param c collection
* @returns true if it exists, false otherwise
*/
virtual bool collection_exists(const coll_t& c) = 0;
/**
* is a collection empty?
*
* @param c collection
* @param empty true if the specified collection is empty, false otherwise
* @returns 0 on success, negative error code on failure.
*/
virtual int collection_empty(CollectionHandle& c, bool *empty) = 0;
/**
* return the number of significant bits of the coll_t::pgid.
*
* This should return what the last create_collection or split_collection
* set. A legacy backend may return -EAGAIN if the value is unavailable
* (because we upgraded from an older version, e.g., FileStore).
*/
virtual int collection_bits(CollectionHandle& c) = 0;
/**
* list contents of a collection that fall in the range [start, end) and no more than a specified many result
*
* @param c collection
* @param start list object that sort >= this value
* @param end list objects that sort < this value
* @param max return no more than this many results
* @param seq return no objects with snap < seq
* @param ls [out] result
* @param next [out] next item sorts >= this value
* @return zero on success, or negative error
*/
virtual int collection_list(CollectionHandle &c,
const ghobject_t& start, const ghobject_t& end,
int max,
vector<ghobject_t> *ls, ghobject_t *next) = 0;
/// OMAP
/// Get omap contents
virtual int omap_get(
CollectionHandle &c, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
bufferlist *header, ///< [out] omap header
map<string, bufferlist> *out /// < [out] Key to value map
) = 0;
/// Get omap header
virtual int omap_get_header(
CollectionHandle &c, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
bufferlist *header, ///< [out] omap header
bool allow_eio = false ///< [in] don't assert on eio
) = 0;
/// Get keys defined on oid
virtual int omap_get_keys(
CollectionHandle &c, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
set<string> *keys ///< [out] Keys defined on oid
) = 0;
/// Get key values
virtual int omap_get_values(
CollectionHandle &c, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
const set<string> &keys, ///< [in] Keys to get
map<string, bufferlist> *out ///< [out] Returned keys and values
) = 0;
/// Filters keys into out which are defined on oid
virtual int omap_check_keys(
CollectionHandle &c, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
const set<string> &keys, ///< [in] Keys to check
set<string> *out ///< [out] Subset of keys defined on oid
) = 0;
/**
* Returns an object map iterator
*
* Warning! The returned iterator is an implicit lock on filestore
* operations in c. Do not use filestore methods on c while the returned
* iterator is live. (Filling in a transaction is no problem).
*
* @return iterator, null on error
*/
virtual ObjectMap::ObjectMapIterator get_omap_iterator(
CollectionHandle &c, ///< [in] collection
const ghobject_t &oid ///< [in] object
) = 0;
virtual int flush_journal() {
return -EOPNOTSUPP;
}
virtual int dump_journal(ostream& out) {
return -EOPNOTSUPP;
}
virtual int snapshot(const string& name) {
return -EOPNOTSUPP;
}
/**
* Set and get internal fsid for this instance. No external data is modified
*/
virtual void set_fsid(uuid_d u) = 0;
virtual uuid_d get_fsid() = 0;
/**
* Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store
* - num objects - total (including witeouts) object count to measure used space for.
*/
virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0;
virtual void compact() {}
virtual bool has_builtin_csum() const
{
return false;
}
};

1.10 type traits

/// integral_constant
template<typename _Tp, _Tp __v>
struct integral_constant {
static constexpr _Tp value = __v; // 注意static
typedef _Tp value_type;
typedef integral_constant<_Tp, __v> type;
constexpr operator value_type() const { return value; }
constexpr value_type operator()() const { return value; }
};
// 类静态成员的再次声明,在c++11中,constexpr应该不是需要重新声明定义的。
template<typename _Tp, _Tp __v>
constexpr _Tp integral_constant<_Tp, __v>::value;

不一样的operator

#include <iostream>
class A {
public:
operator int() const {
std::cout << "value_type()" << std::endl;
return 100;
}
int operator()() const {
std::cout << "operator()()" << std::endl;
return 101;
}
};
int main(void) {
A a;
int t = a; // 这里调用operator int() const
a(); // 这里是做为仿函数被调用
return 0;
}

所以这里需要区别operator type() consttype operator() const
一个是用来进行类型转换,一个是用来模拟仿函数。

integral_constant 的作用

字面常量参数

在模板编程中,如果一个模板参数不是typename T而是int N或者bool cond这种参数,那么这个参数必须是字面常量。

template<bool cond, typename T>
class A ...
// 使用时
A<true, int> aInt;
A<false, int> bInt;

而不能用

bool cond = false;
A<cond, int> aInt; // 这里编译出错

当然也可以使用c++11引入的字面常量表达式

constexpr bool cond = true;
A<cond, int> aInt; // OK

总结
使用模板的时候,编译器的输入就是文本里面的字符,是不能存在任何存储空间的变量。因此,模板参数只能使用类型或者是字面常量。

也就是说,模板元编程的输入有两种:

  • 类型,比如int, double, float, 以及自定义类型。
  • 类型常量,比如自定义了某种类型,然后定义了这种类型的字面常量。或者内建类型的字段常量,比如false/true或者constexpr bool cond = true这里cond也是属生字面常量。

如何通用地定义字面常量

由前面的模板元输入的可以知道,类型有两种,一种是类型一种是类型对应的常量。那么是否有方法可以把两者统一起来?

首先看一下支持类型常量的写法。

template<typename T, bool X>
SomeCode
// use
SomeCode<someType, true> x;

在第二个参数这里就必须显示地指定类型。那么如何后面要支持其他通用的类型的常量,比如int类型的常量,又需要把同样的代码抄一遍。

temlate<typename T, int X>
SomeCode
// use
SomeCode<someType, 10> x;

这样就显示得一点也不泛型了。那么最好的办法是利用一个抽象把类型字面常量表示起来。这就是integral_constant

首先看一下如何定义bool类型的常量:

typedef std::integral_constant<bool, true> true_type
typedef std::integral_constant<bool, false> false_type

那么,后面在定义模板的时候,就可以只写一份了。为了区分,这里把SomeCode改成SomeClass

template<typename T, typename ConstValue>
class SomeClass {
// 引用到给定的常量的类型
typedef typename ConstValue::type const_type;
// 引用到给定的常量的值
constexpr const_type value = ConstValue::value;
};

但是使用方式上就会有点麻烦了。

// 原来是SomeCode<int, false> aInst;
// 现在是
typedef std::integral_constant<bool, true> true_type
typedef std::integral_constant<bool, false> false_type
SomeClass<int, false_type> aInst;
// 原来是SomeCode<int, 10> bInst;
// 现在是如下
SomeClass<int, std::integral_constant<int,10>> bInst;

总结

  • 从抽象上来说,代码的实现方直接把类型字面常量抽象成一个模板,并且type/value可以引用到相应的类型和值。
  • 服务方SomeClass里面在引用到类型和字面常量值的时候,就通过::type/::value来引用。
  • 客户端原本使用SomeCode<int, 10>的地方要修改成SomeClass<int, std::integral_constant<int,10>>

定义一些常量

// The type used as a compile-time boolean with true value.
typedef integral_constant<bool, true> true_type;
// The type used as a compile-time boolean with false value.
typedef integral_constant<bool, false> false_type;
template<bool __v>
using __bool_constant = integral_constant<bool, __v>;
template<bool __v>
using bool_constant = integral_constant<bool, __v>;

if/else

// Primary template.
/// Define a member typedef @c type to one of two argument types.
template<bool _Cond, typename _Iftrue, typename _Iffalse>
struct conditional
{ typedef _Iftrue type; };
// Partial specialization for false.
template<typename _Iftrue, typename _Iffalse>
struct conditional<false, _Iftrue, _Iffalse>
{ typedef _Iffalse type; };

这个和前面自己实现的if/else模板元本质上没有太大区别。

template<bool cond, typename If, typename Then>
struct IF {
typedef If type;
};
template<typename If, typename Then>
struct IF<false, If, Then> {
typedef Then type;
};

除了命名不一样,代码本质是一样的。

可变参数

c++11开始支持模板可变参数,使用方式如下:

#include <iostream>
//前向声明
template<typename... Args>
struct Sum;
//基本定义
template<typename First, typename... Rest>
struct Sum<First, Rest...> {
static constexpr int value = Sum<First>::value + Sum<Rest...>::value;
};
//递归终止
template<typename Last>
struct Sum<Last> {
static constexpr int value = sizeof (Last);
};
int main(void) {
std::cout << Sum<float, int, double>::value << std::endl;
return 0;
}

类型的or操作

有了这个小程序,那么再看接下来的一个stl里面的模板就容易了。

template<typename...>
struct __or_;
// 当没有给定参数的时候,返回false_type
// 本质上就是一个false_type
template<>
struct __or_<>
: public false_type
{ };
// 如果只有一个参数B1, 那么返回值就是
// B1
template<typename _B1>
struct __or_<_B1>
: public _B1
{ };
// 如果有两个参数的时候,那么就根据第一个参数的
// 值来决定返回的类型
template<typename _B1, typename _B2>
struct __or_<_B1, _B2>
: public conditional<_B1::value, _B1, _B2>::type
{ };
// 实际上等价的效果就是
// B1::value | B2::value .. | Bn::value
template<typename _B1, typename _B2, typename _B3, typename... _Bn>
struct __or_<_B1, _B2, _B3, _Bn...>
: public conditional<_B1::value, _B1, __or_<_B2, _B3, _Bn...>>::type
{ };

这个__or_的作用就是取出所有给定类型的第一个true/非零类型

#include <iostream>
#include <type_traits>
int main(void) {
typedef std::integral_constant<int, 10> int_10_cont;
auto x = std::__or_<int_10_cont, std::true_type,std::false_type,std::true_type>::value;
std::cout << x << std::endl;
return 0;
}

总结

关于or类型操作的总结就是,符合C/C++里面的a||b操作,如果a非0,那么就可以直接返回a了。

类型的and操作

了解了前面的if/or操作,可以利用conditional来操作了。只不过需要注意的是conditional的使用。在a||b的时候,使用是

if (A::value) {
return A::type;
} else {
return B::type;
}

那么and操作的时候就需要这样。

if (A::value) {
return B::type;
} else {
return A::type;
}

但是正式的写法是conditional<A::value, B::type, A::type>

所以原本一个非常简单的类型操作函数,需要写成如下格式。

template<typename...>
struct __and_;
// 注意空的时候,返回true_type
template<>
struct __and_<>
: public true_type
{ };
// 只有一个类型的时候,直接返回这个类型
template<typename _B1>
struct __and_<_B1>
: public _B1
{ };
// 如同前面说的if的结构
template<typename _B1, typename _B2>
struct __and_<_B1, _B2>
: public conditional<_B1::value, _B2, _B1>::type
{ };
template<typename _B1, typename _B2, typename _B3, typename... _Bn>
struct __and_<_B1, _B2, _B3, _Bn...>
: public conditional<_B1::value, __and_<_B2, _B3, _Bn...>, _B1>::type
{ };

这里需要特别注意一下。后面这段语句类似于下面这个函数。

type and(B1, B2, B3, ... Bn) {
if (B1::value) {
return and(B2, B3, Bn...);
} else {
return B1::type;
}
}

not类型

对于一个字面常量类型也可以进行not操作。

template<typename _Pp>
struct __not_
: public integral_constant<bool, !_Pp::value>
{ };

可以发现,这里通过!_Pp::value直接转换成了

typedef std::integral_constant<bool, true> true_type
typedef std::integral_constant<bool, false> false_type

交并补

这里针对类型的列表有如下操作交/并/补,只不过这里补操作只针对有一个类型。

template<typename... _Bn>
struct conjunction
: __and_<_Bn...>
{ };
// 注意,... _Bn可变参数列表
// 都是有::value类型的,
// 后面声明了inline变量方便引用
template<typename... _Bn>
struct disjunction
: __or_<_Bn...>
{ };
template<typename _Pp>
struct negation
: __not_<_Pp>
{ };

由于每个操作,都会返回相应的integral_constant类型,所以操作的结果是都会有::value成员。为了方便引用这些成员,又定义了别名。

template<typename... _Bn>
inline constexpr bool conjunction_v
= conjunction<_Bn...>::value;
template<typename... _Bn>
inline constexpr bool disjunction_v
= disjunction<_Bn...>::value;
template<typename _Pp>
inline constexpr bool negation_v
= negation<_Pp>::value;

是否有参数

有时候,需要判断是否有参数。那么写法如下:

template<typename _Tp>
struct __success_type
{ typedef _Tp type; };
struct __failure_type
{ };

这里定义的两个类型,需要重点看一下相应的这段注释。

// For several sfinae-friendly trait implementations we transport both the
// result information (as the member type) and the failure information (no
// member type). This is very similar to std::enable_if, but we cannot use
// them, because we need to derive from them as an implementation detail.
// 对于某些sfinae友好的trait函数而言,返回值有两种,一种是需要返回相应的类型(毕竟是模板元编程,函数的输入与输出都是类型),另外一种返回值是没有任何类型输出,这个时候就用`__failure_type`,这个与std::enable_if是相似的,但是不能用`enable_if`,因为需要得到实施的具体细节。

is_void

有时候我们想知道一个类型是否是void类型。那么如何判断?这个时候就需要利用偏特化。首先看一下客户端代码是如何使用这个is_void的。

#include <iostream>
#include <type_traits>
int main(void) {
std::cout << std::is_void<void>::value << std::endl;
std::cout << std::is_void<int>::value << std::endl;
return 0;
}

那么is_void是如何实现的呢?一种比较简单的实现可能如下:

template<typename T>
struct is_void {
static constexpr bool value = false;
};
template<>
struct is_void {
static constexpr bool value = true;
};

但是,在之前的type_traits里面是定义了各种信息丰富的常量的。这里可以借助于true_type/false_type来定义。

template<typename>
struct remove_cv;
template<typename>
struct __is_void_helper
: public false_type { };
template<>
struct __is_void_helper<void>
: public true_type { };
/// is_void
template<typename _Tp>
struct is_void
: public __is_void_helper<typename remove_cv<_Tp>::type>::type
{ };

当然,这里的实现还是比较细致的,还去除了const void/volatile void等修饰符之后再判断。

如果写成一个函数就是

bool_type __is_void_helper(type) {
if (type == void) {
return true_type;
} else {
return false_type;
}
}
true_type/false_type is_void(input_type) {
temp_type = remove_cv(input_type);
return __is_void_helper(temp_type);
}

关于继承

这里需要说明,一种简单的实现是没有必要采用继承来处理的。比如

template<typename T>
struct is_void {
static constexpr bool value = false;
};
template<>
struct is_void {
static constexpr bool value = true;
};

但是如果其他类型,比如is_int/is_float等等这样的函数都需要这么定义,就会显得特别的繁琐。每个都需要在里面进行相同的定义,那么一种简单的办法当然是采用C++的继承了。

template<typename T>
struct is_void : public false_type {
};
template<>
struct is_void : public true_type {
};

但是,这里还需要考虑的是,如果有const void/volatile void应该怎么办?这个时候,就需要先通过remove_cv把类型里面的const/volatile去掉。所以这里还需要中间加一层。

template<typename T>
struct __is_void_helper : public false_type {
};
template<>
struct __is_void_helper : public true_type {
};
template<typename T>
struct is_void : public __is_void_helper<typename remove_cv<T>::type>::type {
};

需要注意,这里继承的是: public __is_void_helper<typename remove_cv<T>::type>::type注意,这里用的是type而不是value_type。这里再回顾一下std::integral_constant

template<class T, T v>
struct integral_constant {
static constexpr T value = v;
typedef T value_type;
typedef integral_constant type; // 使用注入的类名
constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; } // c++14 起
};

注意这里typevalue_type的差别。

is_void的另一种实现

template< class T >
struct is_void : std::is_same<void, typename std::remove_cv<T>::type> {};

判断内置类型

template<>
struct __is_integral_helper<bool>
: public true_type { };
template<>
struct __is_integral_helper<char>
: public true_type { };
template<>
struct __is_integral_helper<signed char>
: public true_type { };
template<>
struct __is_integral_helper<unsigned char>
: public true_type { };
template<>
struct __is_integral_helper<wchar_t>
: public true_type { };
template<>
struct __is_integral_helper<char16_t>
: public true_type { };
template<>
struct __is_integral_helper<char32_t>
: public true_type { };
template<>
struct __is_integral_helper<short>
: public true_type { };
template<>
struct __is_integral_helper<unsigned short>
: public true_type { };
template<>
struct __is_integral_helper<int>
: public true_type { };
template<>
struct __is_integral_helper<unsigned int>
: public true_type { };
template<>
struct __is_integral_helper<long>
: public true_type { };
template<>
struct __is_integral_helper<unsigned long>
: public true_type { };
template<>
struct __is_integral_helper<long long>
: public true_type { };
template<>
struct __is_integral_helper<unsigned long long>
: public true_type { };
template<>
struct __is_integral_helper<__GLIBCXX_TYPE_INT_N_0>
: public true_type { };
template<>
struct __is_integral_helper<unsigned __GLIBCXX_TYPE_INT_N_0>
: public true_type { };
template<>
struct __is_integral_helper<__GLIBCXX_TYPE_INT_N_1>
: public true_type { };
template<>
struct __is_integral_helper<unsigned __GLIBCXX_TYPE_INT_N_1>
: public true_type { };
template<>
struct __is_integral_helper<__GLIBCXX_TYPE_INT_N_2>
: public true_type { };
template<>
struct __is_integral_helper<unsigned __GLIBCXX_TYPE_INT_N_2>
: public true_type { };
template<>
struct __is_integral_helper<__GLIBCXX_TYPE_INT_N_3>
: public true_type { };
template<>
struct __is_integral_helper<unsigned __GLIBCXX_TYPE_INT_N_3>
: public true_type { };
/// is_integral
template<typename _Tp>
struct is_integral
: public __is_integral_helper<typename remove_cv<_Tp>::type>::type
{ };

这里的实现方式都是与前面提到的类似。

template<typename T>
struct xxx : public __xxx_helper<typename remove_cv<T>::type>::type {
};

基他所有的类型都是通过__is_integral_helper偏特化来实现的。

用同样的方式可以实现is_floating_point

template<typename>
struct __is_floating_point_helper
: public false_type { };
template<>
struct __is_floating_point_helper<float>
: public true_type { };
template<>
struct __is_floating_point_helper<double>
: public true_type { };
template<>
struct __is_floating_point_helper<long double>
: public true_type { };
#if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
template<>
struct __is_floating_point_helper<__float128>
: public true_type { };
#endif
/// is_floating_point
template<typename _Tp>
struct is_floating_point
: public __is_floating_point_helper<typename remove_cv<_Tp>::type>::type
{ };

实现思路还是比较清晰的。

#include <iostream>
template<typename T>
struct __is_floating_helper : public std::false_type {};
template<>
struct __is_floating_helper<float> : public std::true_type {};
template<>
struct __is_floating_helper<double> : public std::true_type {};
template<>
struct __is_floating_helper<long double> : public std::true_type {};
template<typename T>
struct is_floating : public __is_floating_helper<typename std::remove_cv<T>::type>::type
{};
int main(void) {
std::cout << is_floating<float>::value << std::endl;
std::cout << is_floating<double>::value << std::endl;
std::cout << is_floating<long double>::value << std::endl;
return 0;
}

另外一种有趣的实现,也可以从cppreference网站得到。

https://zh.cppreference.com/w/cpp/types/is_floating_point

template< class T >
struct is_floating_point
: std::integral_constant<
bool,
std::is_same<float, typename std::remove_cv<T>::type>::value ||
std::is_same<double, typename std::remove_cv<T>::type>::value ||
std::is_same<long double, typename std::remove_cv<T>::type>::value
> {};

is_array

判断一个类型是否是数组is_array

template<typename>
struct is_array
: public false_type { };
template<typename _Tp, std::size_t _Size>
struct is_array<_Tp[_Size]>
: public true_type { };
template<typename _Tp>
struct is_array<_Tp[]>
: public true_type { };

方法也很简单,就是通过偏特化处理T[N]和T[]

is_pointer

template<typename>
struct __is_pointer_helper
: public false_type { };
template<typename _Tp>
struct __is_pointer_helper<_Tp*>
: public true_type { };
/// is_pointer
template<typename _Tp>
struct is_pointer
: public __is_pointer_helper<typename remove_cv<_Tp>::type>::type
{ };

这里的实现依然是采用偏特化来实现的is_pointer。实际上,偷懒一点也可以采用如下实现:

#include <iostream>
#include <type_traits>
template<typename T>
struct is_pointer : public std::false_type {};
template<typename T>
struct is_pointer<T*> : public std::true_type {};
int main(void) {
std::cout << is_pointer<const int*>::value << std::endl;
std::cout << is_pointer<int const*>::value << std::endl;
return 0;
}

并不需要处理const/volatile等情况。但是真正在实现中,依然处理了这种情况。

template< class T > struct is_pointer_helper : std::false_type {};
template< class T > struct is_pointer_helper<T*> : std::true_type {};
template< class T > struct is_pointer : is_pointer_helper<typename std::remove_cv<T>::type> {};

右引用与右引用

/// is_lvalue_reference
template<typename>
struct is_lvalue_reference
: public false_type { };
template<typename _Tp>
struct is_lvalue_reference<_Tp&>
: public true_type { };
/// is_rvalue_reference
template<typename>
struct is_rvalue_reference
: public false_type { };
template<typename _Tp>
struct is_rvalue_reference<_Tp&&>
: public true_type { };

这里的实现就没有考虑std::remove_cv的情况。所以我觉得前面在查看是否有指针的时候,其实也可以不用remove_cv的。

is_member_object_pointer

判断一个类型是不是类成员指针。检查T是否为非静态成员对象指针。若T是非静态成员对象指针类型,则提供等于 true的成员常量value。否则,value 等于false

template<class T>
struct is_member_object_pointer : std::integral_constant<
bool,
std::is_member_pointer<T>::value &&
!std::is_member_function_pointer<T>::value
> {};

is_member_pointer则是判断一个类型是否是类成员指针这个指针可能是指向某个成员的指针,也有可能是类成员函数指针

is_member_function_pointer而是指一个成员函数指针

而这里的实现相对要复杂一些。

template<typename>
struct is_function;
template<typename>
struct __is_member_object_pointer_helper
: public false_type { };
// 在这里利用偏特化的同时,
// 去掉了成员函数指针
template<typename _Tp, typename _Cp>
struct __is_member_object_pointer_helper<_Tp _Cp::*>
: public integral_constant<bool, !is_function<_Tp>::value> { };
/// is_member_object_pointer
template<typename _Tp>
struct is_member_object_pointer
: public __is_member_object_pointer_helper<
typename remove_cv<_Tp>::type>::type
{ };
// 这里没有必要remove_cv的。

成员函数指针

template<typename>
struct __is_member_function_pointer_helper
: public false_type { };
template<typename _Tp, typename _Cp>
struct __is_member_function_pointer_helper<_Tp _Cp::*>
: public integral_constant<bool, is_function<_Tp>::value> { };
/// is_member_function_pointer
template<typename _Tp>
struct is_member_function_pointer
: public __is_member_function_pointer_helper<
typename remove_cv<_Tp>::type>::type
{ };

成员函数traits的使用方式?在讲使用方式之前,需要先讲一个比较少见的C++的语法。

比如判断一个成员是不是member object pointer需要使用语法int(cls::*)

#include <iostream>
#include <type_traits>
int main() {
class cls {};
std::cout << (std::is_member_object_pointer<int(cls::*)>::value
? "T is member object pointer"
: "T is not a member object pointer") << '\n';
std::cout << (std::is_member_object_pointer<int(cls::*)()>::value
? "T is member object pointer"
: "T is not a member object pointer") << '\n';
}

类成员类型声明

一般写代码的时候,都是如下方式使用类成员。

class X {
int a;
int b;
};
int main(void) {
X x;
x.a; // 这里使用a
x.b; // 这里使用b
}

但是有时候可,可能会用一个变量指向a/b。比如使用一个变量obj_ref指向a/b,使用时,就使用x

x.*obj_ref = 10;

这样就可以给a/b进行赋值了。

#include <iostream>
using namespace std;
class X {
public:
int a;
void f(int b) {
cout << "The value of b is "<< b << endl;
}
};
int main() {
// declare pointer to data member
int X::*ptiptr = &X::a;
// declare a pointer to member function
void (X::* ptfptr) (int) = &X::f;
// create an object of class type X
X xobject;
// initialize data member
xobject.*ptiptr = 10;
cout << "The value of a is " << xobject.*ptiptr << endl;
// call member function
(xobject.*ptfptr) (20);
}

总结

  • type X::*引用的是成员指针。
  • type X::*(函数参数)引用的是类成员的函数。
typedef int X::*my_pointer_to_member;
typedef void (X::*my_pointer_to_function) (int);
int main() {
my_pointer_to_member ptiptr = &X::a;
my_pointer_to_function ptfptr = &X::f;
X xobject;
xobject.*ptiptr = 10;
cout << "The value of a is " << xobject.*ptiptr << endl;
(xobject.*ptfptr) (20);
}

成员的判断

成员的判断主要是有两种

- is_member_object_pointer
- is_member_function_pointer

实际上,也就是用来判断一个给定的类型是不是从属于这两种类型。

#include <iostream>
#include <type_traits>
class X {
int a;
int f(int a, int b) { return 0; }
};
int main() {
std::cout << std::boolalpha;
std::cout << std::is_member_object_pointer<int(X::*)>::value << std::endl;
// or
typedef int X::* x_class_object_pointer_t;
std::cout << std::is_member_object_pointer<x_class_object_pointer_t>::value << std::endl;
return 0;
}

只是需要注意在声明模板的时候,这里需要用到两个参数。

template<typename>
struct is_function;
template<typename>
struct __is_member_object_pointer_helper
: public false_type { };
template<typename _Tp, typename _Cp>
struct __is_member_object_pointer_helper<_Tp _Cp::*>
: public integral_constant<bool, !is_function<_Tp>::value> { };
/// is_member_object_pointer
template<typename _Tp>
struct is_member_object_pointer
: public __is_member_object_pointer_helper<
typename remove_cv<_Tp>::type>::type
{ };

接下来的is_member_function_pointer就也可以顺理成章写出来了。

template<typename>
struct __is_member_function_pointer_helper
: public false_type { };
template<typename _Tp, typename _Cp>
struct __is_member_function_pointer_helper<_Tp _Cp::*>
: public integral_constant<bool, is_function<_Tp>::value> { };
/// is_member_function_pointer
template<typename _Tp>
struct is_member_function_pointer
: public __is_member_function_pointer_helper<
typename remove_cv<_Tp>::type>::type
{ };

总结
无论是类内部的成员指针或者是类内部的函数指针,都可以通过Type Class::*这样来进行定义。只不过这个Type如果是int/float/double等类型或自定义类型,那么就是一个成员指针。如果Type是一个函数指针,那么指向的,就是一个类内部函数指针

编译器支持

接下来三项,就是需要编译器进行支持的。比如__is_enum/__is_union/__is_class这三个都是需要编译器支持的。

/// is_enum
template<typename _Tp>
struct is_enum
: public integral_constant<bool, __is_enum(_Tp)>
{ };
/// is_union
template<typename _Tp>
struct is_union
: public integral_constant<bool, __is_union(_Tp)>
{ };
/// is_class
template<typename _Tp>
struct is_class
: public integral_constant<bool, __is_class(_Tp)>
{ };

is_function则是一个长长的偏特化处理,基本上包含了各种函数的写法。在这里忽略掉。不去展开讲这个宏。

是否是nullptr

template<typename>
struct __is_null_pointer_helper
: public false_type { };
template<>
struct __is_null_pointer_helper<std::nullptr_t>
: public true_type { };
/// is_null_pointer (LWG 2247).
template<typename _Tp>
struct is_null_pointer
: public __is_null_pointer_helper<typename remove_cv<_Tp>::type>::type
{ };
/// __is_nullptr_t (extension).
template<typename _Tp>
struct __is_nullptr_t
: public is_null_pointer<_Tp>
{ };

这里的处理非常简单,就是利用nullptr来完成偏特化。

const & volatile

/// is_const
template<typename>
struct is_const
: public false_type { };
template<typename _Tp>
struct is_const<_Tp const>
: public true_type { };
/// is_volatile
template<typename>
struct is_volatile
: public false_type { };
template<typename _Tp>
struct is_volatile<_Tp volatile>
: public true_type { };

这里需要注意的是,在C++里面,int constconst int这两者是差不多的作用。

平凡和标准部局

/// is_trivial
template<typename _Tp>
struct is_trivial
: public integral_constant<bool, __is_trivial(_Tp)>
{ };
// is_trivially_copyable
template<typename _Tp>
struct is_trivially_copyable
: public integral_constant<bool, __is_trivially_copyable(_Tp)>
{ };
/// is_standard_layout
template<typename _Tp>
struct is_standard_layout
: public integral_constant<bool, __is_standard_layout(_Tp)>
{ };
/// is_pod
// Could use is_standard_layout && is_trivial instead of the builtin.
template<typename _Tp>
struct is_pod
: public integral_constant<bool, __is_pod(_Tp)>
{ };

这里在判断的时候,都是通过编译器内置的接口来进行判断。
除此之外,还有

  • is_literal_type
  • is_empty 这个是指类是否是空的,里面啥都没有。
  • is_polymorphic 是不是多态
  • is_final 是否是final类,不可继承。
  • is_abstract 是否是抽象类

signed判断

template<typename _Tp,
bool = is_arithmetic<_Tp>::value>
struct __is_signed_helper
: public false_type { };
// 这里利用-1这个转换来进行判断
template<typename _Tp>
struct __is_signed_helper<_Tp, true>
: public integral_constant<bool, _Tp(-1) < _Tp(0)>
{ };
/// is_signed
template<typename _Tp>
struct is_signed
: public __is_signed_helper<_Tp>::type
{ };
/// is_unsigned
// 由于前面提供了not操作符
template<typename _Tp>
struct is_unsigned
: public __and_<is_arithmetic<_Tp>, __not_<is_signed<_Tp>>>
{ };

1.9 循环展开

冒泡排序的展开

void bubbleSort(int* data, int n) {
if( n <= 1)
return;
for(int j = 0; j < n-1; ++j)
if(data[j] > data[j+1])
std::swap(data[j], data[j+1]);
bubbleSort(data, n-1);
}
#include <iostream>
template<int j>
void check_swap(int *data) {
if (data[j] > data[j+1])
std::swap(data[j], data[j+1]);
}
// 不能让j==n
// 因为下一把递归到check_swap的时候
// j+1就超出n了。
template<int n, int j>
void bloop(int *data) {
check_swap<j>(data);
// 本来递归的写法就应该是
// bloop<n, j+1>(data);
// 但是这种写法以是无穷展开
// 所以需要判断j+1 > n
// 如果j+1 > n,然后把第二个模板参数
// 设置为-1
bloop< (j+1 >= n? -1 : n), (j+1 >= n? -1 : j+1)>(data);
}
template<>
void bloop<-1,-1>(int *data) {
}
template<int N>
void bsort(int *data) {
// for (i = 0, i < N-1; check_swap(i,i+1))
// 也就是0, N-1
bloop<N-1, 0>(data);
bsort<N-1>(data);
}
template<>
void bsort<1>(int *data) {
}
template<>
void bsort<0>(int *data) {
}
int main(void) {
int data[] = {4,3,2,1};
bsort<4>(data);
for (auto x : data) {
std::cout << x << std::endl;
}
return 0;
}

代码膨胀

为了实现封装细节,那么可以把上面的bloop/check_swap函数都放到一个类里面,写法如下:

template<int n>
class IntBubbleSortC {
template<int j>
static inline void check_swap ....
template<int n, int j>
static inline void bloop ...
public:
static inline void bsort ...
};
template<>
class IntBubbleSortC<0> {
public:
static inline void sort(int* data) { }
};
int main() {
int data[4] = {3,4,2,1};
IntBubbleSortC<4>::sort(data); // 如此调用
std::cin.get(); return 0;
}

但是这里一定要注意的是类内部的函数定义一定要使用inline函数。否则会导致代码膨胀。

1.9 模板的控制结构

IF语句

这里可以拿最开始的那个例子。也就是通过if/else实现控制语句。

#include <iostream>
template<bool value, typename If, typename Then>
struct IF {
typedef If type;
};
template<typename If, typename Then>
struct IF<false, If, Then> {
typedef Then type;
};
struct IfClass {
void print() {
std::cout << "IfClass" << std::endl;
}
};
struct ThenClass {
void print() {
std::cout << "ThenClass" << std::endl;
}
};
int main(void) {
IF<true, IfClass, ThenClass>::type x;
x.print();
IF<false, IfClass, ThenClass>::type y;
y.print();
return 0;
}

自定义数据类型

const int len = 4;
typedef IF<sizeof(long long)==len, long long, void>::type long_long_or_void_t;
typedef IF<sizeof(long)==len, long, long_long_or_void_t>::type long_or_llong_void_t;
typedef IF<sizeof(int)==len, int, long_or_llong_void_t>::type int_long_llong_void_t;
typedef IF<sizeof(short)==len, short, int_long_llong_void_t>::type short_int_long_llong_void_t;
typedef short_int_long_llong_void_t int32_t;

这里实际上就是想定义一个完全4bytes的整数类型。

如果嵌套一点,就是

const int len = 4;
typedef
IF_<sizeof(short)==len, short,
IF_<sizeof(int)==len, int,
IF_<sizeof(long)==len, long,
IF_<sizeof(long long)==len, long long,
void>::reType>::reType>::reType>::reType
int32_t; // 定义一个指定字节数的类型
std::cout << sizeof(int32_t) << std::endl;

WHILE语句

通过模板,也可以实现一个WHILE语句。

// cond需要有value/type
// statement需要有next
template<template<typename> class cond, typename statement>
struct WHILE {
struct STOP {
typedef statement type;
};
typedef typename
IF<cond<statement>::value,
WHILE<cond, typename statement::next>,
STOP
>::type::type type;
};

比如下面这个程序就是用来求解1^x + 2^x + 3^x + ... + n^x。代码中用e表示x注意,不是自然对数的那个e

#include <iostream>
template<bool cond, typename If, typename Then>
struct IF {
typedef If type;
};
template<typename If, typename Then>
struct IF<false, If, Then> {
typedef Then type;
};
// cond需要有value/type
// statement需要有next
template<template<typename> class cond, typename statement>
struct WHILE {
struct STOP {
typedef statement type;
};
typedef typename
IF<cond<statement>::value,
WHILE<cond, typename statement::next>,
STOP
>::type::type type;
};
// 计算 1^e+2^e+...+n^e
template<int n, int e>
struct sum_pow {
// pow是取x^e
template<int x>
struct pow {
// p^q的求解
template<int p, int q>
struct pow_q {
static constexpr int value = p * pow_q<p,q-1>::value;
};
template<int p>
struct pow_q<p, 0> {
static constexpr int value = 1;
};
static constexpr int value = pow_q<x,e>::value;
};
template<typename statement>
struct cond {
static constexpr bool value = statement::iter <= n;
typedef statement type;
};
template<int i, int sum>
struct statement {
typedef statement<i+1, sum+pow<i>::value> next;
static constexpr int iter = i;
static constexpr int value = sum;
};
static constexpr int value = WHILE<cond, statement<1,0>>::type::value;
};
int main(void) {
// 1^2 + 2^2 + ... + 10^2
std::cout << sum_pow<10,2>::value << std::endl;
return 0;
}

有趣的编译错误

这个编译错误是在无意中写错程序发现的。那就是利用WHILE循环来求最大公约数。

#include <iostream>
template<bool cond, typename If, typename Then>
struct IF {
typedef If type;
};
template<typename If, typename Then>
struct IF<false, If, Then> {
typedef Then type;
};
// cond has type/value
// statement has next
template<template <typename> class cond, typename statement>
struct WHILE {
struct STOP {
typedef statement type;
};
typedef typename IF<
cond<statement>::value,
WHILE<cond, typename statement::next>,
STOP
>::type::type type;
// 注意:这里有两层type,
// 第一层是IF<>::type,比如递归结束时,定位到STOP
// 第二层是通过剥开STOP,通过STOP拿到最后的类型=STOP::type
};
// Test GCD while循环
/*
* a,b
* while (b) {
* temp = b;
* b = a % b;
* a = b;
* }
*/
template<int a, int b>
struct GCD {
template<typename statement>
struct cond {
static constexpr bool value = statement::i != 0;
};
template<int x, int y>
struct statement {
typedef statement<y, x%y> next;
static constexpr int i = x%y;
static constexpr int value = y;
// 注意:i = y; value = x % y会导致编译通不过
// 这里一定要注意,就是防止出现
// x % 0 的情况
// 由于当i = y, value = x % y的时候无法避免这种情况,
// 报错信息就特别奇怪
// non-type template argument is not a constant expression
// division by zero -> 这里才是关键
// 如果给的值是statement<10, 5>
// -> typedef statement<5, 0> next;
// -> i = 5,
// -> value = 0;
// WHILE里面会发现i = 5,于是还可以展开statement<5,0>
// -> typedef statement<0,5%0> next; <-- 这里也就报错了。
};
static const int value = WHILE<cond, statement<a,b>>::type::value;
};
int main(void) {
std::cout << Sum<100>::value << std::endl;
std::cout << PowSum<10,2>::value << std::endl;
std::cout << GCD<10, 5>::value << std::endl;
return 0;
}

一定要注意程序中的注释:

// 注意:i = y; value = x % y会导致编译通不过
// 这里一定要注意,就是防止出现
// x % 0 的情况
// 由于当i = y, value = x % y的时候无法避免这种情况,
// 报错信息就特别奇怪
// non-type template argument is not a constant expression
// division by zero -> 这里才是关键
// 如果给的值是statement<10, 5>
// -> typedef statement<5, 0> next;
// -> i = 5,
// -> value = 0;
// WHILE里面会发现i = 5,于是还可以展开statement<5,0>
// -> typedef statement<0,5%0> next; <-- 这里也就报错了。

1.8 模板元编程

模板元编程

  • 编译期数值计算: 求阶乘
  • 类型计算: if/else, is_same_type
  • 代码计算

注意元编程的位置。

编程形式

从编程范型(programming paradigm)上来说,C++模板是函数式编程(functional programming),它的主要特点是:函数调用不产生任何副作用(没有可变的存储),用递归形式实现循环结构的功能。C++模板的特例化提供了条件判断能力,而模板递归嵌套提供了循环的能力,这两点使得其具有和普通语言一样通用的能力(图灵完备性)。

总结一下就是:

  • 不能访问存储变量
  • 偏特化来支持递归
  • 偏特化实现if/else结构

编程形式来看,模板的<>中的模板参数相当于函数调用输入参数,模板中的typedefstatic constenum定义函数返回值(类型数值数值仅支持整型,如果需要可以通过编码计算浮点数),代码计算是通过类型计算进而选择类型的函数实现的(C++ 属于静态类型语言,编译器对类型的操控能力很强)。

这里需要解释一下:

  • templnate<.输入类型.>是函数的输入
  • typedef ….或者static constexpr 整型是函数的返回值
  • 代码计算是通过不同的类型跳转到不同的函数来运行。

这里有个非常简单的例子来说明这三点。

#include <iostream>
template<typename T, int i=1>
class someComputing {
public:
typedef volatile T* retType; // 类型计算
enum { retValume = i + someComputing<T, i-1>::retValume }; // 数值计算,递归
static void f() { std::cout << "someComputing: i=" << i << '\n'; }
};
template<typename T> // 模板特例,递归终止条件
class someComputing<T, 0> {
public:
enum { retValume = 0 };
};
template<typename T>
class codeComputing {
public:
static void f() { T::f(); } // 根据类型调用函数,代码计算
};
int main(){
someComputing<int>::retType a=0;
std::cout << sizeof(a) << '\n'; // 64-bit 程序指针
// VS2013 默认最大递归深度500,GCC4.8 默认最大递归深度900(-ftemplate-depth=n)
std::cout << someComputing<int, 500>::retValume << '\n'; // 1+2+...+500
codeComputing<someComputing<int, 99>>::f();
std::cin.get(); return 0;
}

模板元编程的组织形式

数值计算

前面已经介绍了连续求和的模板。这里写一个编译时计算一个数值是否是质数。

#include <iostream>
template<int N>
struct is_prime {
template<int p, int i>
struct check_prime {
static constexpr bool value = (p%i) && check_prime<p, i-1>::value;
};
template<int p>
struct check_prime<p, 2> {
static constexpr bool value = p == 2 || (p > 2 && (p&0x01));
};
template<int p>
struct check_prime<p, 1> {
static constexpr bool value = true;
};
static constexpr bool value = N == 2 || check_prime<N, N-1>::value;
};
int main(void) {
std::cout << is_prime<5>::value << std::endl;
std::cout << is_prime<7>::value << std::endl;
std::cout << is_prime<2>::value << std::endl;
return 0;
}

如果是再写一个类,用来连续打印<=N的所有的质数。写法如下:

template<int N>
struct print_prime {
static constexpr bool value = (N < 2) ? false : is_prime<N>::value;
static void print() {
if (value) {
std::cout << N << ":" << "true" << std::endl;
}
print_prime<N-1>::print();
}
};
template<>
struct print_prime<2> {
static constexpr bool value = true;
static void print() {
std::cout << 2 << ":" << "true" << std::endl;
}
};

1.7 c++11的扩展

  • >> 根据上下文自动识别正确语义;
  • 函数模板参数默认值;
  • 变长模板参数(扩展 sizeof…() 获取参数个数);
  • 模板别名(扩展 using 关键字);
  • 外部模板实例(拓展 extern 关键字),弃用 export template。

1.6 模板的编译

模板的编译一般采用包含编译,(其他比较偏门的会介绍export啥的),export已经被废止了,所以不再去研究。

正确的做法是:

  • 模板的实现都放到.h也就是头文件里面
  • 使用的时候直接包含头文件

在链接阶段,如果是因为模板生成的代码,比如类定义或者函数义有多份,那么会自动去重。

嵌套类型依赖

假设想使用T类里面的iterator,如果代码写做如下:

template<typename T>
void print2nd(const T& container)
{
T::const_iterator * x;
...
}

由于编译器并不清楚const_iterator是一个类型,还是静态变量,大多数情况下会认为是一个静态变量。但是,这里实际上是一个类型,所以这里会编译报错。

那么正确的写法应该是

template<typename T>
void print2nd(const T& container)
{
typename T::const_iterator *x;
...
}

也就是说,在嵌套类型依赖的时候,需要明确的指出typename这个类型。

例外

只有2个地方是不需要在嵌套依赖的时候来指明typename的,那么就是声明继承的时候。

template<typename T>
class Derived: public Base<T>::Nested // 1. 这里不需要
{
 public:
  explicit Derived(int x)
  : Base<T>::Nested(x) // 2. 这里也不需要
  {
   typename Base<T>::Nested temp; //nested dependent type, need typename
  }
};

比如,这里Nested就是Base<T>里面的一个类型。实际上,这里也很明显,那就是继承肯定是继承于类型,而不是一个静态变量。

类中的模板函数

template<typename T>
class aTMP{
public:
typedef const T reType;
};
void f() {
std::cout << "global f()" << std::endl;
}
template<typename T>
class Base {
public:
template<int N = 99> // 这里需要声明模板函数
void f() {
std::cout << "member f(): " << N << std::endl;
}
};

this指针

this 用于指定查找基类中的成员

  • (当基类是依赖模板参数的类模板实例时,由于实例化总是推迟,这时不依赖模板参数的名字不在基类中查找,文献[1]第 166 页)
#include <iostream>
template<typename T>
class aTMP{
public: typedef const T reType;
};
void f() { std::cout << "global f()\n"; }
template<typename T>
class Base {
public:
template <int N = 99>
void f() { std::cout << "member f(): " << N << '\n'; }
};
template<typename T>
class Derived : public Base<T> {
public:
typename T::reType m; // typename 不能省略
Derived(typename T::reType a) : m(a) { }
void df1() { f(); } // 调用全局 f(),而非想象中的基类 f()
void df2() { this->template f(); } // 基类 f<99>()
void df3() { Base<T>::template f<22>(); } // 强制基类 f<22>()
void df4() { ::f(); } // 强制全局 f()
};
int main(){
Derived<aTMP<int>> a(10);
a.df1(); a.df2(); a.df3(); a.df4();
std::cin.get(); return 0;
}