题目:LRU cache
Design and implement a data structure for Least Recently Used (LRU) cache. It should support the following operations: get and set.
get(key) - Get the value (will always be positive) of the key if the key exists in the cache, otherwise return -1.
set(key, value) - Set or insert the value if the key is not already present. When the cache reached its capacity, it should invalidate the least recently used item before inserting a new item.
LRU是一种应用在操作系统上的缓存替换策略,和我们常见的FIFO算法一样,都是用于操作系统中内存管理中的页面替换,其全称叫做Least Recently Used(近期最少使用算法),算法主要是根据数据的历史访问记录来进行数据的淘汰,其核心思想是“如果数据最近被访问过,那么将来被访问的几率也更高”。
LRU算法设计
数据结构的选择:因为涉及到数据元素的查找,删除,替换,移动等操作,所以我们选择列表来进行数据的存储,为了考虑时间复杂度,我们分析一下,单链表插入删除操作的时间复杂度为O(n),双链表为O(1),所以,首选肯定是双链表,另外,元素的查找操作,map的查找效率为O(lgn),首选应该是map,但还有一个hashmap,能够达到O(1)的查找效率,我们后面再编程的时候都试一下这几种方法,看看其能不能通过编译,通过了时间又是多少?
为了能够比较形象的了解LRU的执行过程,我们举一个例子,如下:
假定现有一进程的页面访问序列为:
4,7,0,7,1,0,1,2,1,2,6
缓存容量为5,则随着进程的访问,缓存栈中页面号的变化情况如下图所示。在访问页面6时发生了缺页,此时页面4是最近最久未被访问的页,应将它置换出去。
在算法实现时,我们可以把最近最久没有使用的数据放在链表的最后,当缓存空间满时(即发生缺页),直接将最后一个数据淘汰即可,同时,如果一个数据发生命中,或者新来一个数据,我们都将该数据移到链表的头部,这样就能保证在链表头部的数据都是最近访问过的,而链表后面的数据就是最近最久没有访问过的。如下所示:
代码实现,为了验证上面所提出数据结构是否能通过LeetCode的编译,我们都实现一遍,下面是single list+map的实现,时间复杂度为O(n)+O(lgn),开始我还以为通过不了,最后还是通过了,耗时大约900ms。
/************************************************************************/
/* 单链表版本
/************************************************************************/
struct Node {
int m_nKey;
int m_nValue;
Node* m_pNext;
};
class LRUCache {
public:
LRUCache(int capacity) {
m_nSize = capacity;
m_nCount = 0;
m_lruList = NULL;
}
int get(int key) {
if (NULL == m_lruList)
return -1;
map<int, Node *>::iterator it = m_lruMap.find(key);
if (it == m_lruMap.end()) //没有找到
return -1;
else {
Node *p = it->second;
//把节点移到链表的开头
pushFront(p);
}
return m_lruList->m_nValue;
}
void set(int key, int value) {
if (NULL == m_lruList) {
m_lruList = new Node();
m_lruList->m_nKey = key;
m_lruList->m_nValue = value;
m_lruList->m_pNext = NULL;
m_nCount ++;
m_lruMap[key] = m_lruList;
}
else {
map<int, Node *>::iterator it = m_lruMap.find(key);
if (it == m_lruMap.end()){ //没有命中,将链表的最后一个节点删除
if (m_nSize == m_nCount) { //cache已满
Node *pHead = m_lruList;
Node *pTemp = pHead;
while(pHead->m_pNext != NULL) {
pTemp = pHead;
pHead = pHead->m_pNext;
}
m_lruMap.erase(pHead->m_nKey);
m_nCount --;
if (pHead == pTemp) //只有一个节点
pHead = NULL;
else
pTemp->m_pNext = NULL;
}
Node *p = new Node(); //插入新的节点于头部
p->m_nKey = key;
p->m_nValue = value;
p->m_pNext = m_lruList;
m_lruList = p;
m_lruMap[key] = m_lruList;
m_nCount ++;
}
else { //命中,则将该命中的节点移至链表头部
Node *pCur = it->second;
pCur->m_nValue = value;
pushFront(pCur);
}
}
}
void pushFront(Node *pCur) { //把节点移动到链表头部,时间复杂度O(n)
if (NULL == pCur)
return;
if (m_nCount == 1 || pCur == m_lruList)
return;
Node *pHead = m_lruList;
while (pHead->m_pNext != pCur)
pHead = pHead->m_pNext;
pHead->m_pNext = pCur->m_pNext;
pCur->m_pNext = m_lruList;
m_lruList = pCur;
}
void printCache() {
Node *p = m_lruList;
while (p) {
cout << p->m_nKey << ":" << p->m_nValue << " ";
p = p->m_pNext;
}
}
private:
int m_nSize;
int m_nCount;
map<int, Node *> m_lruMap;
Node* m_lruList;
};
下面是double list+map版本,时间复杂度为O(1)+O(lgn),耗时大约300s
1 /************************************************************************/
2 /* 双链表版本
3 /************************************************************************/
4 struct Node {
5 int m_nKey;
6 int m_nValue;
7 Node* m_pNext;
8 Node* m_pPre;
9 };
10
11 class LRUCache {
12 public:
13 LRUCache(int capacity) {
14 m_nSize = capacity;
15 m_nCount = 0;
16 m_lruListHead = NULL;
17 m_lruListTail = NULL;
18 }
19
20 int get(int key) {
21 if (NULL == m_lruListHead)
22 return -1;
23 map<int, Node *>::iterator it = m_lruMap.find(key);
24 if (it == m_lruMap.end()) //没有找到
25 return -1;
26 else {
27 Node *p = it->second;
28 //把节点移到链表的开头
29 pushFront(p);
30 }
31 return m_lruListHead->m_nValue;
32 }
33
34 void set(int key, int value) {
35 if (NULL == m_lruListHead) {
36 m_lruListHead = new Node();
37 m_lruListHead->m_nKey = key;
38 m_lruListHead->m_nValue = value;
39 m_lruListHead->m_pNext = NULL;
40 m_lruListHead->m_pPre = NULL;
41 m_lruListTail = m_lruListHead;
42 m_nCount ++;
43 m_lruMap[key] = m_lruListHead;
44 }
45 else {
46 map<int, Node *>::iterator it = m_lruMap.find(key);
47 if (it == m_lruMap.end()){ //没有命中,将链表的最后一个节点删除
48 if (m_nSize == m_nCount) { //cache已满
49 if (m_lruListHead == m_lruListTail) {//只有一个节点
50 m_lruMap.erase(m_lruListHead->m_nKey);
51 m_lruListHead->m_nKey = key;
52 m_lruListHead->m_nValue = value;
53 m_lruMap[key] = m_lruListHead;
54 }
55 else {
56 Node *p = m_lruListTail;
57 m_lruListTail->m_pPre->m_pNext = NULL;
58 m_lruListTail = m_lruListTail->m_pPre;
59 m_lruMap.erase(p->m_nKey);
60
61 p->m_nKey = key;
62 p->m_nValue = value;
63 p->m_pNext = m_lruListHead;
64 p->m_pPre = NULL;
65 m_lruListHead->m_pPre = p;
66 m_lruListHead = p;
67 m_lruMap[key] = m_lruListHead;
68 }
69 }
70 else {
71 Node *p = new Node(); //插入新的节点于头部
72 p->m_nKey = key;
73 p->m_nValue = value;
74 p->m_pNext = m_lruListHead;
75 p->m_pPre = NULL;
76 m_lruListHead->m_pPre = p;
77 m_lruListHead = p;
78 m_lruMap[key] = m_lruListHead;
79 m_nCount ++;
80 }
81 }
82 else { //命中,则将该命中的节点移至链表头部
83 Node *pCur = it->second;
84 pCur->m_nValue = value;
85 pushFront(pCur);
86 }
87 }
88 }
89
90 void pushFront(Node *pCur) { //把节点移动到链表头部,时间复杂度O(1)
91 if (NULL == pCur)
92 return;
93 if (m_nCount == 1 || pCur == m_lruListHead)
94 return;
95 if (pCur == m_lruListTail) { //假如是尾节点
96 pCur->m_pPre->m_pNext = NULL;
97 pCur->m_pNext = m_lruListHead;
98 m_lruListTail = pCur->m_pPre;
99 m_lruListHead->m_pPre = pCur;
100 m_lruListHead = pCur;
101 }
102 else {
103 pCur->m_pPre->m_pNext = pCur->m_pNext;
104 pCur->m_pNext->m_pPre = pCur->m_pPre;
105
106 pCur->m_pNext = m_lruListHead;
107 m_lruListHead->m_pPre = pCur;
108 m_lruListHead = pCur;
109 }
110 }
111
112 void printCache() {
113 Node *p = m_lruListHead;
114 while (p) {
115 cout << p->m_nKey << ":" << p->m_nValue << " ";
116 p = p->m_pNext;
117 }
118 }
119
120 private:
121 int m_nSize;
122 int m_nCount;
123 map<int, Node *> m_lruMap;
124 Node* m_lruListHead;
125 Node* m_lruListTail;
126 };
下面是hashmap+list版本,如果是C++,list和hashmap都是STL自带的功能实现,所以,我们直接应用STL库,代码量大大减少,时间复杂度为O(1).^-^代码参考:dancingrain
#include <iostream>
#include <hash_map>
#include <list>
#include <utility>
using namespace std;
using namespace stdext;
class LRUCache{
public:
LRUCache(int capacity) {
m_capacity = capacity ;
}
int get(int key) {
int retValue = -1 ;
hash_map<int, list<pair<int, int> > :: iterator> ::iterator it = cachesMap.find(key) ;
//如果在Cashe中,将记录移动到链表的最前端
if (it != cachesMap.end())
{
retValue = it ->second->second ;
//移动到最前端
list<pair<int, int> > :: iterator ptrPair = it -> second ;
pair<int, int> tmpPair = *ptrPair ;
caches.erase(ptrPair) ;
caches.push_front(tmpPair) ;
//修改map中的值
cachesMap[key] = caches.begin() ;
}
return retValue ;
}
void set(int key, int value) {
hash_map<int, list<pair<int, int> > :: iterator> ::iterator it = cachesMap.find(key) ;
if (it != cachesMap.end()) //已经存在其中
{
list<pair<int, int> > :: iterator ptrPait = it ->second ;
ptrPait->second = value ;
//移动到最前面
pair<int , int > tmpPair = *ptrPait ;
caches.erase(ptrPait) ;
caches.push_front(tmpPair) ;
//更新map
cachesMap[key] = caches.begin() ;
}
else //不存在其中
{
pair<int , int > tmpPair = make_pair(key, value) ;
if (m_capacity == caches.size()) //已经满
{
int delKey = caches.back().first ;
caches.pop_back() ; //删除最后一个
//删除在map中的相应项
hash_map<int, list<pair<int, int> > :: iterator> ::iterator delIt = cachesMap.find(delKey) ;
cachesMap.erase(delIt) ;
}
caches.push_front(tmpPair) ;
cachesMap[key] = caches.begin() ; //更新map
}
}
private:
int m_capacity ; //cashe的大小
list<pair<int, int> > caches ; //用一个双链表存储cashe的内容
hash_map< int, list<pair<int, int> > :: iterator> cachesMap ; //使用map加快查找的速度
};