1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.regionserver.snapshot;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.List;
24 import java.util.concurrent.Callable;
25 import java.util.concurrent.ExecutionException;
26 import java.util.concurrent.ExecutorCompletionService;
27 import java.util.concurrent.Future;
28 import java.util.concurrent.LinkedBlockingQueue;
29 import java.util.concurrent.ThreadPoolExecutor;
30 import java.util.concurrent.TimeUnit;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.hbase.classification.InterfaceAudience;
35 import org.apache.hadoop.hbase.classification.InterfaceStability;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.DaemonThreadFactory;
38 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
39 import org.apache.hadoop.hbase.TableName;
40 import org.apache.hadoop.hbase.errorhandling.ForeignException;
41 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
42 import org.apache.hadoop.hbase.master.snapshot.MasterSnapshotVerifier;
43 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
44 import org.apache.hadoop.hbase.procedure.ProcedureMember;
45 import org.apache.hadoop.hbase.procedure.ProcedureMemberRpcs;
46 import org.apache.hadoop.hbase.procedure.RegionServerProcedureManager;
47 import org.apache.hadoop.hbase.procedure.Subprocedure;
48 import org.apache.hadoop.hbase.procedure.SubprocedureFactory;
49 import org.apache.hadoop.hbase.procedure.ZKProcedureMemberRpcs;
50 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51 import org.apache.hadoop.hbase.regionserver.HRegion;
52 import org.apache.hadoop.hbase.regionserver.HRegionServer;
53 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
54 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
55 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
56 import org.apache.zookeeper.KeeperException;
57
58 import com.google.protobuf.InvalidProtocolBufferException;
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
74 @InterfaceStability.Unstable
75 public class RegionServerSnapshotManager extends RegionServerProcedureManager {
76 private static final Log LOG = LogFactory.getLog(RegionServerSnapshotManager.class);
77
78
79 private static final String CONCURENT_SNAPSHOT_TASKS_KEY = "hbase.snapshot.region.concurrentTasks";
80 private static final int DEFAULT_CONCURRENT_SNAPSHOT_TASKS = 3;
81
82
83 public static final String SNAPSHOT_REQUEST_THREADS_KEY = "hbase.snapshot.region.pool.threads";
84
85 public static final int SNAPSHOT_REQUEST_THREADS_DEFAULT = 10;
86
87
88 public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.region.timeout";
89
90 public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000;
91
92
93 public static final String SNAPSHOT_REQUEST_WAKE_MILLIS_KEY = "hbase.snapshot.region.wakefrequency";
94
95 private static final long SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT = 500;
96
97 private RegionServerServices rss;
98 private ProcedureMemberRpcs memberRpcs;
99 private ProcedureMember member;
100
101
102
103
104
105
106
107
108 RegionServerSnapshotManager(Configuration conf, HRegionServer parent,
109 ProcedureMemberRpcs memberRpc, ProcedureMember procMember) {
110 this.rss = parent;
111 this.memberRpcs = memberRpc;
112 this.member = procMember;
113 }
114
115 public RegionServerSnapshotManager() {}
116
117
118
119
120 @Override
121 public void start() {
122 LOG.debug("Start Snapshot Manager " + rss.getServerName().toString());
123 this.memberRpcs.start(rss.getServerName().toString(), member);
124 }
125
126
127
128
129
130
131 @Override
132 public void stop(boolean force) throws IOException {
133 String mode = force ? "abruptly" : "gracefully";
134 LOG.info("Stopping RegionServerSnapshotManager " + mode + ".");
135
136 try {
137 this.member.close();
138 } finally {
139 this.memberRpcs.close();
140 }
141 }
142
143
144
145
146
147
148
149
150
151
152
153 public Subprocedure buildSubprocedure(SnapshotDescription snapshot) {
154
155
156 if (rss.isStopping() || rss.isStopped()) {
157 throw new IllegalStateException("Can't start snapshot on RS: " + rss.getServerName()
158 + ", because stopping/stopped!");
159 }
160
161
162
163 List<HRegion> involvedRegions;
164 try {
165 involvedRegions = getRegionsToSnapshot(snapshot);
166 } catch (IOException e1) {
167 throw new IllegalStateException("Failed to figure out if we should handle a snapshot - "
168 + "something has gone awry with the online regions.", e1);
169 }
170
171
172
173
174
175 LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table "
176 + snapshot.getTable());
177 ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName());
178 Configuration conf = rss.getConfiguration();
179 long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,
180 SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
181 long wakeMillis = conf.getLong(SNAPSHOT_REQUEST_WAKE_MILLIS_KEY,
182 SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT);
183
184 switch (snapshot.getType()) {
185 case FLUSH:
186 SnapshotSubprocedurePool taskManager =
187 new SnapshotSubprocedurePool(rss.getServerName().toString(), conf);
188 return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis,
189 timeoutMillis, involvedRegions, snapshot, taskManager);
190 case SKIPFLUSH:
191
192
193
194
195
196
197
198 SnapshotSubprocedurePool taskManager2 =
199 new SnapshotSubprocedurePool(rss.getServerName().toString(), conf);
200 return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis,
201 timeoutMillis, involvedRegions, snapshot, taskManager2);
202
203 default:
204 throw new UnsupportedOperationException("Unrecognized snapshot type:" + snapshot.getType());
205 }
206 }
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223 private List<HRegion> getRegionsToSnapshot(SnapshotDescription snapshot) throws IOException {
224 return rss.getOnlineRegions(TableName.valueOf(snapshot.getTable()));
225 }
226
227
228
229
230 public class SnapshotSubprocedureBuilder implements SubprocedureFactory {
231
232 @Override
233 public Subprocedure buildSubprocedure(String name, byte[] data) {
234 try {
235
236 SnapshotDescription snapshot = SnapshotDescription.parseFrom(data);
237 return RegionServerSnapshotManager.this.buildSubprocedure(snapshot);
238 } catch (InvalidProtocolBufferException e) {
239 throw new IllegalArgumentException("Could not read snapshot information from request.");
240 }
241 }
242
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258 static class SnapshotSubprocedurePool {
259 private final ExecutorCompletionService<Void> taskPool;
260 private final ThreadPoolExecutor executor;
261 private volatile boolean stopped;
262 private final List<Future<Void>> futures = new ArrayList<Future<Void>>();
263 private final String name;
264
265 SnapshotSubprocedurePool(String name, Configuration conf) {
266
267 long keepAlive = conf.getLong(
268 RegionServerSnapshotManager.SNAPSHOT_TIMEOUT_MILLIS_KEY,
269 RegionServerSnapshotManager.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
270 int threads = conf.getInt(CONCURENT_SNAPSHOT_TASKS_KEY, DEFAULT_CONCURRENT_SNAPSHOT_TASKS);
271 this.name = name;
272 executor = new ThreadPoolExecutor(1, threads, keepAlive, TimeUnit.MILLISECONDS,
273 new LinkedBlockingQueue<Runnable>(), new DaemonThreadFactory("rs("
274 + name + ")-snapshot-pool"));
275 taskPool = new ExecutorCompletionService<Void>(executor);
276 }
277
278 boolean hasTasks() {
279 return futures.size() != 0;
280 }
281
282
283
284
285
286
287
288 void submitTask(final Callable<Void> task) {
289 Future<Void> f = this.taskPool.submit(task);
290 futures.add(f);
291 }
292
293
294
295
296
297
298
299
300
301 boolean waitForOutstandingTasks() throws ForeignException, InterruptedException {
302 LOG.debug("Waiting for local region snapshots to finish.");
303
304 int sz = futures.size();
305 try {
306
307 for (int i = 0; i < sz; i++) {
308 Future<Void> f = taskPool.take();
309 f.get();
310 if (!futures.remove(f)) {
311 LOG.warn("unexpected future" + f);
312 }
313 LOG.debug("Completed " + (i+1) + "/" + sz + " local region snapshots.");
314 }
315 LOG.debug("Completed " + sz + " local region snapshots.");
316 return true;
317 } catch (InterruptedException e) {
318 LOG.warn("Got InterruptedException in SnapshotSubprocedurePool", e);
319 if (!stopped) {
320 Thread.currentThread().interrupt();
321 throw new ForeignException("SnapshotSubprocedurePool", e);
322 }
323
324 } catch (ExecutionException e) {
325 if (e.getCause() instanceof ForeignException) {
326 LOG.warn("Rethrowing ForeignException from SnapshotSubprocedurePool", e);
327 throw (ForeignException)e.getCause();
328 }
329 LOG.warn("Got Exception in SnapshotSubprocedurePool", e);
330 throw new ForeignException(name, e.getCause());
331 } finally {
332 cancelTasks();
333 }
334 return false;
335 }
336
337
338
339
340
341 void cancelTasks() throws InterruptedException {
342 Collection<Future<Void>> tasks = futures;
343 LOG.debug("cancelling " + tasks.size() + " tasks for snapshot " + name);
344 for (Future<Void> f: tasks) {
345
346
347
348
349 f.cancel(false);
350 }
351
352
353 futures.clear();
354 while (taskPool.poll() != null) {}
355 stop();
356 }
357
358
359
360
361 void stop() {
362 if (this.stopped) return;
363
364 this.stopped = true;
365 this.executor.shutdownNow();
366 }
367 }
368
369
370
371
372
373
374 @Override
375 public void initialize(RegionServerServices rss) throws KeeperException {
376 this.rss = rss;
377 ZooKeeperWatcher zkw = rss.getZooKeeper();
378 this.memberRpcs = new ZKProcedureMemberRpcs(zkw,
379 SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION);
380
381
382 Configuration conf = rss.getConfiguration();
383 long keepAlive = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
384 int opThreads = conf.getInt(SNAPSHOT_REQUEST_THREADS_KEY, SNAPSHOT_REQUEST_THREADS_DEFAULT);
385
386
387 ThreadPoolExecutor pool = ProcedureMember.defaultPool(rss.getServerName().toString(),
388 opThreads, keepAlive);
389 this.member = new ProcedureMember(memberRpcs, pool, new SnapshotSubprocedureBuilder());
390 }
391
392 @Override
393 public String getProcedureSignature() {
394 return SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
395 }
396
397 }