1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.catalog;
19
20 import com.google.common.annotations.VisibleForTesting;
21 import com.google.common.base.Stopwatch;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.Abortable;
28 import org.apache.hadoop.hbase.HRegionInfo;
29 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
30 import org.apache.hadoop.hbase.ServerName;
31 import org.apache.hadoop.hbase.client.HConnection;
32 import org.apache.hadoop.hbase.client.HConnectionManager;
33 import org.apache.hadoop.hbase.client.HTable;
34 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
35 import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
36 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
37 import org.apache.hadoop.hbase.master.RegionState;
38 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
39 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
40 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
43 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
44 import org.apache.hadoop.ipc.RemoteException;
45
46 import java.io.EOFException;
47 import java.io.IOException;
48 import java.net.ConnectException;
49 import java.net.NoRouteToHostException;
50 import java.net.SocketException;
51 import java.net.SocketTimeoutException;
52 import java.net.UnknownHostException;
53
54
55
56
57
58
59
60
61
62
63
64
65 @InterfaceAudience.Private
66 public class CatalogTracker {
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 private static final Log LOG = LogFactory.getLog(CatalogTracker.class);
113 private final HConnection connection;
114 private final ZooKeeperWatcher zookeeper;
115 private final MetaRegionTracker metaRegionTracker;
116 private boolean instantiatedzkw = false;
117 private Abortable abortable;
118
119 private volatile boolean stopped = false;
120
121 static final byte [] META_REGION_NAME =
122 HRegionInfo.FIRST_META_REGIONINFO.getRegionName();
123
124
125
126
127
128
129
130
131
132
133
134
135 public CatalogTracker(final Configuration conf) throws IOException {
136 this(null, conf, null);
137 }
138
139
140
141
142
143
144
145
146
147
148
149
150
151 public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
152 Abortable abortable)
153 throws IOException {
154 this(zk, conf, HConnectionManager.getConnection(conf), abortable);
155 }
156
157 public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
158 HConnection connection, Abortable abortable)
159 throws IOException {
160 this.connection = connection;
161 if (abortable == null) {
162
163 this.abortable = this.connection;
164 }
165 Abortable throwableAborter = new Abortable() {
166
167 @Override
168 public void abort(String why, Throwable e) {
169 throw new RuntimeException(why, e);
170 }
171
172 @Override
173 public boolean isAborted() {
174 return true;
175 }
176
177 };
178 if (zk == null) {
179
180 this.zookeeper =
181 new ZooKeeperWatcher(conf, "catalogtracker-on-" + connection.toString(),
182 abortable);
183 instantiatedzkw = true;
184 } else {
185 this.zookeeper = zk;
186 }
187 this.metaRegionTracker = new MetaRegionTracker(zookeeper, throwableAborter);
188 }
189
190
191
192
193
194
195
196
197 public void start() throws IOException, InterruptedException {
198 LOG.debug("Starting catalog tracker " + this);
199 try {
200 this.metaRegionTracker.start();
201 } catch (RuntimeException e) {
202 Throwable t = e.getCause();
203 this.abortable.abort(e.getMessage(), t);
204 throw new IOException("Attempt to start meta tracker failed.", t);
205 }
206 }
207
208
209
210
211 @VisibleForTesting
212 public boolean isStopped() {
213 return this.stopped;
214 }
215
216
217
218
219
220 public void stop() {
221 if (!this.stopped) {
222 LOG.debug("Stopping catalog tracker " + this);
223 this.stopped = true;
224 this.metaRegionTracker.stop();
225 try {
226 if (this.connection != null) {
227 this.connection.close();
228 }
229 } catch (IOException e) {
230
231
232 LOG.error("Attempt to close catalog tracker's connection failed.", e);
233 }
234 if (this.instantiatedzkw) {
235 this.zookeeper.close();
236 }
237 }
238 }
239
240
241
242
243
244
245
246
247 public ServerName getMetaLocation() throws InterruptedException {
248 return this.metaRegionTracker.getMetaRegionLocation();
249 }
250
251
252
253
254
255 public boolean isMetaLocationAvailable() {
256 return this.metaRegionTracker.isLocationAvailable();
257 }
258
259
260
261
262
263
264
265
266
267
268
269 public ServerName waitForMeta(final long timeout)
270 throws InterruptedException, NotAllMetaRegionsOnlineException {
271 ServerName sn = metaRegionTracker.waitMetaRegionLocation(timeout);
272 if (sn == null) {
273 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
274 }
275 return sn;
276 }
277
278
279
280
281
282 public RegionState getMetaRegionState() {
283 return metaRegionTracker.getMetaRegionState();
284 }
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299 public AdminService.BlockingInterface waitForMetaServerConnection(long timeout)
300 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
301 return getMetaServerConnection(timeout);
302 }
303
304
305
306
307
308
309
310
311
312
313
314
315 AdminService.BlockingInterface getMetaServerConnection(long timeout)
316 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
317 return getCachedConnection(waitForMeta(timeout));
318 }
319
320
321
322
323
324
325
326
327 public void waitForMeta() throws InterruptedException {
328 Stopwatch stopwatch = new Stopwatch().start();
329 while (!this.stopped) {
330 try {
331 if (waitForMeta(100) != null) break;
332 long sleepTime = stopwatch.elapsedMillis();
333
334 if ((sleepTime + 1) % 10000 == 0) {
335 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
336 }
337 } catch (NotAllMetaRegionsOnlineException e) {
338 if (LOG.isTraceEnabled()) {
339 LOG.trace("hbase:meta still not available, sleeping and retrying." +
340 " Reason: " + e.getMessage());
341 }
342 }
343 }
344 }
345
346
347
348
349
350
351
352
353 private AdminService.BlockingInterface getCachedConnection(ServerName sn)
354 throws IOException {
355 if (sn == null) {
356 return null;
357 }
358 AdminService.BlockingInterface service = null;
359 try {
360 service = connection.getAdmin(sn);
361 } catch (RetriesExhaustedException e) {
362 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
363
364 } else {
365 throw e;
366 }
367 } catch (SocketTimeoutException e) {
368 LOG.debug("Timed out connecting to " + sn);
369 } catch (NoRouteToHostException e) {
370 LOG.debug("Connecting to " + sn, e);
371 } catch (SocketException e) {
372 LOG.debug("Exception connecting to " + sn);
373 } catch (UnknownHostException e) {
374 LOG.debug("Unknown host exception connecting to " + sn);
375 } catch (FailedServerException e) {
376 if (LOG.isDebugEnabled()) {
377 LOG.debug("Server " + sn + " is in failed server list.");
378 }
379 } catch (IOException ioe) {
380 Throwable cause = ioe.getCause();
381 if (ioe instanceof ConnectException) {
382
383 } else if (cause != null && cause instanceof EOFException) {
384
385 } else if (cause != null && cause.getMessage() != null &&
386 cause.getMessage().toLowerCase().contains("connection reset")) {
387
388 } else {
389 throw ioe;
390 }
391
392 }
393 return service;
394 }
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411 private boolean verifyRegionLocation(AdminService.BlockingInterface hostingServer,
412 final ServerName address, final byte [] regionName)
413 throws IOException {
414 if (hostingServer == null) {
415 LOG.info("Passed hostingServer is null");
416 return false;
417 }
418 Throwable t = null;
419 try {
420
421 return ProtobufUtil.getRegionInfo(hostingServer, regionName) != null;
422 } catch (ConnectException e) {
423 t = e;
424 } catch (RetriesExhaustedException e) {
425 t = e;
426 } catch (RemoteException e) {
427 IOException ioe = e.unwrapRemoteException();
428 t = ioe;
429 } catch (IOException e) {
430 Throwable cause = e.getCause();
431 if (cause != null && cause instanceof EOFException) {
432 t = cause;
433 } else if (cause != null && cause.getMessage() != null
434 && cause.getMessage().contains("Connection reset")) {
435 t = cause;
436 } else {
437 t = e;
438 }
439 }
440 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
441 " at address=" + address + ", exception=" + t);
442 return false;
443 }
444
445
446
447
448
449
450
451
452
453 public boolean verifyMetaRegionLocation(final long timeout)
454 throws InterruptedException, IOException {
455 AdminService.BlockingInterface service = null;
456 try {
457 service = waitForMetaServerConnection(timeout);
458 } catch (NotAllMetaRegionsOnlineException e) {
459
460 } catch (ServerNotRunningYetException e) {
461
462 } catch (UnknownHostException e) {
463
464 } catch (RegionServerStoppedException e) {
465
466 }
467 return (service == null)? false:
468 verifyRegionLocation(service,
469 this.metaRegionTracker.getMetaRegionLocation(), META_REGION_NAME);
470 }
471
472 public HConnection getConnection() {
473 return this.connection;
474 }
475 }