1. 前言
String是C++中操作字符串的类,它是在比较早的时候设计的STL模板,因此在某些地方设计的有些冗余
对于String类,不仅仅是学会使用它,更重要的是要从底层去理解它;本篇文章将从底层出发,模拟实现常用的String类接口(实现方式与不同平台下的标准库中的实现不一定相同)
2. 接口
2.1 成员函数
// 构造
string(const char* str = "");// 拷贝构造
string(const string& s);// 赋值运算符重载
string& operator=(const string& s);// 析构
~string();
2.2 迭代器
typedef char* iterator;
typedef const char* const_iterator;// 可读可写
iterator begin();
iterator end();// 只读
const_iterator begin() const;
const_iterator end() const;
2.3 容量
// 元素个数
size_t size() const;// 当前容量
size_t capacity() const;// 当 n < size 时,缩容,不改变capacity
// 当 size <= n <= capacity 时,什么都不做
// 当 size >= capacity 时,扩容,用 ch 补剩余元素
void resize(size_t n, char ch = '\0');// 当 n <= capacity 时,不改变capacity
// 当 n > capacity 时,扩容
void reserve(size_t n);
2.4 修改
// 尾插
void push_back(char ch);// 追加一个字符/字符串
string& operator+=(char ch);
string& operator+=(const char* str);// 追加字符串
void append(const char* str);// 清空字符串(不改变容量)
void clear();// 在pos位置插入一个字符/字符串
void insert(size_t pos, char ch);
void insert(size_t pos, const char* str);// 在pos位置删除len个字符
void erase(size_t pos, size_t len = npos);// 交换
void swap(string& s);
2.5 元素访问
char& operator[](size_t pos);
const char& operator[](size_t pos) const;
2.6 字符串操作
// 获取字符串
const char* c_str() const;// 从pos位置开支查找一个字符/字符串
size_t find(char ch, size_t pos = 0) const;
size_t find(const char* str, size_t pos = 0) const;// 获取字串
string substr(size_t pos = 0, size_t len = npos) const;
2.7 非静态成员函数重载
// 比较大小
bool operator==(const string& s1, const string& s2);
bool operator!=(const string& s1, const string& s2);
bool operator>(const string& s1, const string& s2);
bool operator>=(const string& s1, const string& s2);
bool operator<(const string& s1, const string& s2);
bool operator<=(const string& s1, const string& s2);// 流插入/提取运算符重载
ostream& operator<<(ostream& out, const string& s);
istream& operator>>(istream& in, string& s);// 交换
void swap(string& s1, string& s2);// 获取一行数据
void getline(istream& in, string& s);
3. 模拟实现
namespace byh
{class string{public://constructorstring(const char* str = "");//copy constructorstring(const string& s);//assignment operator overloadingstring& operator=(const string& s);//destructor~string();//iteratortypedef char* iterator;typedef const char* const_iterator;iterator begin();iterator end();const_iterator begin() const;const_iterator end() const;//capacitysize_t size() const;size_t capacity() const;void resize(size_t n, char ch = '\0');void reserve(size_t n);//modifyvoid push_back(char ch);string& operator+=(char ch);string& operator+=(const char* str);void append(const char* str);void clear();void insert(size_t pos, char ch);void insert(size_t pos, const char* str);void erase(size_t pos, size_t len = npos);void swap(string& s);//accesschar& operator[](size_t pos);const char& operator[](size_t pos) const;//string operationsconst char* c_str() const;size_t find(char ch, size_t pos = 0) const;size_t find(const char* str, size_t pos = 0) const;string substr(size_t pos = 0, size_t len = npos) const;private:char* _str;size_t _size;size_t _capacity;static const size_t npos;};const size_t string::npos = -1;//Non-member function overloadbool operator==(const string& s1, const string& s2);bool operator!=(const string& s1, const string& s2);bool operator>(const string& s1, const string& s2);bool operator>=(const string& s1, const string& s2);bool operator<(const string& s1, const string& s2);bool operator<=(const string& s1, const string& s2);ostream& operator<<(ostream& out, const string& s);istream& operator>>(istream& in, string& s);void swap(string& s1, string& s2);void getline(istream& in, string& s);
}
#include<iostream>
#include<assert.h>
using namespace std;namespace byh
{class string{public:static const size_t npos;//constructorstring(const char* str = ""):_size(strlen(str)){_capacity = _size;_str = new char[_capacity + 1];strcpy(_str, str);}//copy constructorstring(const string& s){string temp(s.c_str());swap(temp);}//assignment operator overloadingstring& operator=(string s){swap(s);return *this;}//destructor~string(){delete[] _str;_str = nullptr;_size = _capacity = 0;}//iteratortypedef char* iterator;typedef const char* const_iterator;iterator begin(){return _str;}iterator end(){return _str + _size;}const_iterator begin() const{return _str;}const_iterator end() const{return _str + _size;}//capacitysize_t size() const{return _size;}size_t capacity() const{return _capacity;}void resize(size_t n, char ch = '\0'){if (n < _size){_str[n] = '\0';_size = n;}else{reserve(n);for (int i = _size; i < n; i++){_str[i] = ch;}_str[n] = '\0';_size = n;}}void reserve(size_t n){if (n > _capacity){char* temp = new char[n + 1];strcpy(temp, _str);_str = temp;_capacity = n;}}//modifyvoid push_back(char ch){insert(_size, ch);}string& operator+=(char ch){insert(_size, ch);return *this;}string& operator+=(const char* str){insert(_size, str);return *this;}void append(const char* str){insert(_size, str);}void clear(){_str[0] = '\0';_size = 0;}void insert(size_t pos, char ch){assert(pos <= _size);if (_size == _capacity)reserve(_capacity == 0 ? 4 : 2*_capacity);size_t end = _size + 1;while (end > pos){_str[end] = _str[end - 1];end--;}_str[pos] = ch;_size += 1;}void insert(size_t pos, const char* str){assert(pos <= _size);size_t len = strlen(str);if (_size + len > _capacity)reserve(_size + len);size_t end = _size + len;while (end > pos + len - 1){_str[end] = _str[end - len];end--;}strncpy(_str + pos, str, len);_size += len;}void erase(size_t pos, size_t len = npos){assert(pos < _size);if (len == npos || len >= _size - pos){_str[pos] = '\0';_size = pos;}else{strcpy(_str + pos, _str + pos + len);_size -= len;}}//swap(s1,s2)void swap(string& s){std::swap(_str, s._str);std::swap(_size, s._size);std::swap(_capacity, s._capacity);}//accesschar& operator[](size_t pos){return _str[pos];}const char& operator[](size_t pos) const{return _str[pos];}//string operationsconst char* c_str() const{return _str;}size_t find(char ch, size_t pos = 0) const{assert(pos < _size);for (size_t i = pos; i < _size; i++){if (_str[i] == ch)return i;}return npos;}size_t find(const char* str, size_t pos = 0) const{assert(pos < _size);char* temp = strstr(_str + pos, str);if (temp)return (temp - _str);elsereturn npos;}string substr(size_t pos = 0, size_t len = npos) const{assert(pos < _size);string temp;if (len == npos || _size - pos <= len){for (size_t i = pos; i < _size; i++){temp += _str[i];}}else{for (size_t i = pos; i < pos + len; i++){temp += _str[i];}}return temp;}private:char* _str = nullptr;size_t _size = 0;size_t _capacity = 0;};const size_t string::npos = -1;//Non-member function overloadbool operator==(const string& s1, const string& s2){return strcmp(s1.c_str(), s2.c_str()) == 0;}bool operator!=(const string& s1, const string& s2){return !(s1 == s2);}bool operator>(const string& s1, const string& s2){return strcmp(s1.c_str(), s2.c_str()) > 0;}bool operator>=(const string& s1, const string& s2){return (s1 > s2 || s1 == s2);}bool operator<(const string& s1, const string& s2){return strcmp(s1.c_str(), s2.c_str()) < 0;}bool operator<=(const string& s1, const string& s2){return !(s1 > s2);}ostream& operator<<(ostream& out, const string& s){out << s.c_str();return out;}istream& operator>>(istream& in, string& s){s.clear();char ch = 0;char temp[128] = { 0 };int i = 0;ch = in.get();while (ch != ' ' && ch != '\n'){temp[i++] = ch;if (127 == i){temp[i] == '\0';s += temp;i = 0;}ch = in.get();}if (i > 0){temp[i] == '\0';s += temp;}return in;}void swap(string& s1, string& s2){s1.swap(s2);}void getline(istream& in, string& s){s.clear();char ch = 0;char temp[128] = { 0 };int i = 0;ch = in.get();while (ch != '\n'){temp[i++] = ch;if (127 == i){temp[i] == '\0';s += temp;i = 0;}ch = in.get();}if (i > 0){temp[i] == '\0';s += temp;}}
}
4. string的额外知识
string中的扩容机制在不同平台下是不同的
- VS:第一次1.5倍扩容,之后都是2倍扩容
- g++:2倍扩容
void Test_increase_capacity() {string s;size_t sz = s.capacity();cout << "capacity->" << sz << endl;for (int i = 0; i < 100; i++){s.push_back('1');if (sz != s.capacity()){sz = s.capacity();cout << "capacity->" << sz << endl;}} }// VS // capacity->15 // capacity->31 // capacity->47 // capacity->70 // capacity->105// g++ //capacity->0 //capacity->1 //capacity->2 //capacity->4 //capacity->8 //capacity->16 //capacity->32 //capacity->64 //capacity->128
造成扩容机制不同的本质原因是VS和g++中string的结构不同
cout << sizeof(string) << endl;// VS:28 // g++:8
VS下string的结构
先是有一个联合体,里面定义了一个长度为16的数组和一个指针
当字符串的长度 <= 16时,使用内部的数组存储字符串
当字符串长度 >16 时,使用指针开辟空间
由于大部分的字符串长度是小于16的,因此直接在栈上开了空间,比去堆上开空间效率高
其次,有一个size_t类型的数据用来表示字符串长度,一个size_t类型的数据用来表示容量
再有一个指针用来干其他事
g++下的string结构
g++下string是用写时拷贝实现的,内部只包含一个指针,该指针指向一块堆空间,里面包含
字符串长度
容量
引用计数
一个指针,用来存放字符串
写时拷贝
当拷贝构造/赋值时,不是开辟新的空间,而是让构造的对象/赋值的对象指向原本的空间,达到节省空间的效果
这样做的问题时:
- 同一块空间会被析构多次
- 修改其中一个对象会影响其他对象
针对问题1的解决方案:当一个对象被析构时,让引用计数count-1,只有当count为0时,再去释放空间
针对问题2的解决方案:当一个对象要被修改时,检查count,如果为1,说明这块空间是该对象独占的,可以任意修改;如果大于1,拷贝新的空间给该对象,count-1