1 package org.apache.jcs.auxiliary.remote;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.jcs.engine.CacheConstants;
25 import org.apache.jcs.engine.behavior.ICache;
26 import org.apache.jcs.engine.behavior.ICompositeCacheManager;
27
28 /***
29 * The RemoteCacheFailoverRunner tries to establish a connection with a failover
30 * server, if any are defined. Once a failover connectin is made, it will
31 * attempt to replace the failover with the primary remote server.
32 * <p>
33 * It works by switching out the RemoteCacheNoWait inside the Facade.
34 * <p>
35 * Client (i.e.) the CompositeCache has refernce to a RemoteCacheNoWaitFacade.
36 * This facade is created by the RemoteCacheFactory. The factory maintains a set
37 * of managers, one for each remote server. Typically, there will only be one
38 * manager.
39 * <p>
40 * If you use multipleremote servesr, you may want to set one or more as
41 * failovers. If a local cache cannot connect to the primary server, or looses
42 * its connection to the primary server, it will attempt to restore that
43 * connectin in the background. If failovers are defined, the Failover runner
44 * will try to connect to a failover until the primary is restored.
45 *
46 */
47 public class RemoteCacheFailoverRunner
48 implements Runnable
49 {
50 private final static Log log = LogFactory.getLog( RemoteCacheFailoverRunner.class );
51
52 private RemoteCacheNoWaitFacade facade;
53
54 private static long idlePeriod = 20 * 1000;
55
56 private boolean alright = true;
57
58 private ICompositeCacheManager cacheMgr;
59
60 /***
61 * Constructor for the RemoteCacheFailoverRunner object. This allows the
62 * FailoverRunner to modify the facade that the CompositeCache references.
63 *
64 * @param facade
65 * the facade the CompositeCache talks to.
66 * @param cacheMgr
67 */
68 public RemoteCacheFailoverRunner( RemoteCacheNoWaitFacade facade, ICompositeCacheManager cacheMgr )
69 {
70 this.facade = facade;
71 this.cacheMgr = cacheMgr;
72 }
73
74 /***
75 * Notifies the cache monitor that an error occurred, and kicks off the
76 * error recovery process.
77 */
78 public void notifyError()
79 {
80 bad();
81 synchronized ( this )
82 {
83 notify();
84 }
85 }
86
87 /***
88 * Main processing method for the RemoteCacheFailoverRunner object.
89 * <p>
90 * If we do not have a connection with any failover server, this will try to
91 * connect one at a time. If no connection can be made, it goes to sleep for
92 * a while (20 seconds).
93 * <p>
94 * Once a connection with a failover is made, we will try to reconnect to
95 * the primary server.
96 * <p>
97 * The primary server is the first server defines in the FailoverServers
98 * list.
99 */
100 public void run()
101 {
102
103
104 connectAndRestore();
105
106 if ( log.isInfoEnabled() )
107 {
108 log.info( "Exiting failover runner. Failover index = " + facade.remoteCacheAttributes.getFailoverIndex() );
109 if ( facade.remoteCacheAttributes.getFailoverIndex() <= 0 )
110 {
111 log.info( "Failover index is <= 0, meaning we are not " + "connected to a failover server." );
112 }
113 else if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 )
114 {
115 log.info( "Failover index is > 0, meaning we are " + "connected to a failover server." );
116 }
117
118 }
119 return;
120 }
121
122 /***
123 * This is the main loop. If there are failovers defined, then this will
124 * continue until the primary is re-connected. If no failovers are defined,
125 * this will exit automatically.
126 */
127 private void connectAndRestore()
128 {
129 do
130 {
131 log.info( "Remote cache FAILOVER RUNNING." );
132
133
134 if ( !alright )
135 {
136
137
138 String[] failovers = facade.remoteCacheAttributes.getFailovers();
139
140
141
142
143 if ( failovers == null )
144 {
145 log.warn( "Remote is misconfigured, failovers was null." );
146 return;
147 }
148 else if ( failovers.length == 1 )
149 {
150
151 if ( log.isInfoEnabled() )
152 {
153 log.info( "No failovers defined, exiting failover runner." );
154 return;
155 }
156 }
157
158 int fidx = facade.remoteCacheAttributes.getFailoverIndex();
159 log.debug( "fidx = " + fidx + " failovers.length = " + failovers.length );
160
161
162
163
164
165 int i = fidx;
166 if ( log.isDebugEnabled() )
167 {
168 log.debug( "stating at failover i = " + i );
169 }
170
171
172 for ( ; i < failovers.length && !alright; i++ )
173 {
174 String server = failovers[i];
175 if ( log.isDebugEnabled() )
176 {
177 log.debug( "Trying server [" + server + "] at failover index i = " + i );
178 }
179
180 RemoteCacheAttributes rca = null;
181 try
182 {
183 rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy();
184 rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) );
185 rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) );
186 RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr );
187
188 if ( log.isDebugEnabled() )
189 {
190 log.debug( "RemoteCacheAttributes for failover = " + rca.toString() );
191 }
192
193
194
195 ICache ic = rcm.getCache( rca.getCacheName() );
196 if ( ic != null )
197 {
198 if ( ic.getStatus() == CacheConstants.STATUS_ALIVE )
199 {
200
201 log.debug( "reseting no wait" );
202 facade.noWaits = new RemoteCacheNoWait[1];
203 facade.noWaits[0] = (RemoteCacheNoWait) ic;
204 facade.remoteCacheAttributes.setFailoverIndex( i );
205
206 synchronized ( this )
207 {
208 if ( log.isDebugEnabled() )
209 {
210 log.debug( "setting ALRIGHT to true" );
211 if ( i > 0 )
212 {
213 log.debug( "Moving to Primary Recovery Mode, failover index = " + i );
214 }
215 else
216 {
217 if ( log.isInfoEnabled() )
218 {
219 String message = "No need to connect to failover, the primary server is back up.";
220 log.info( message );
221 }
222 }
223 }
224
225 alright = true;
226
227 if ( log.isInfoEnabled() )
228 {
229 log.info( "CONNECTED to host = [" + rca.getRemoteHost() + "] port = ["
230 + rca.getRemotePort() + "]" );
231 }
232 }
233 }
234 }
235 else
236 {
237 log.info( "noWait is null" );
238 }
239 }
240 catch ( Exception ex )
241 {
242 bad();
243
244
245
246 if ( i == 0 )
247 {
248 log.warn( "FAILED to connect, as expected, to primary" + rca.getRemoteHost() + ":"
249 + rca.getRemotePort(), ex );
250 }
251 else
252 {
253 log.error( "FAILED to connect to failover [" + rca.getRemoteHost() + ":"
254 + rca.getRemotePort() + "]", ex );
255 }
256 }
257 }
258 }
259
260
261
262 else
263 {
264 if ( log.isDebugEnabled() )
265 {
266 log.debug( "ALRIGHT is true " );
267 }
268 if ( log.isInfoEnabled() )
269 {
270 log.info( "Failover runner is in primary recovery mode. Failover index = "
271 + facade.remoteCacheAttributes.getFailoverIndex() + "\n" + "Will now try to reconnect to primary server." );
272 }
273 }
274
275 boolean primaryRestoredSuccessfully = false;
276
277 if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 )
278 {
279 primaryRestoredSuccessfully = restorePrimary();
280 if ( log.isDebugEnabled() )
281 {
282 log.debug( "Primary recovery success state = " + primaryRestoredSuccessfully );
283 }
284 }
285
286 if ( !primaryRestoredSuccessfully )
287 {
288
289
290 try
291 {
292 log.warn( "Failed to reconnect to primary server. Cache failover runner is going to sleep for "
293 + idlePeriod + " milliseconds." );
294 Thread.sleep( idlePeriod );
295 }
296 catch ( InterruptedException ex )
297 {
298
299 }
300 }
301
302
303 }
304 while ( facade.remoteCacheAttributes.getFailoverIndex() > 0 || !alright );
305
306
307 }
308
309 /***
310 * Try to restore the primary server.
311 * <p>
312 * Once primary is restored the failover listener must be deregistered.
313 * <p>
314 * The primary server is the first server defines in the FailoverServers
315 * list.
316 *
317 * @return boolean value indicating whether the resoration was successful
318 */
319 private boolean restorePrimary()
320 {
321
322 String[] failovers = facade.remoteCacheAttributes.getFailovers();
323 String server = failovers[0];
324
325 if ( log.isInfoEnabled() )
326 {
327 log.info( "Trying to restore connection to primary remote server [" + server + "]" );
328 }
329
330 try
331 {
332 RemoteCacheAttributes rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy();
333 rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) );
334 rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) );
335 RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr );
336
337
338
339 ICache ic = rcm.getCache( rca.getCacheName() );
340
341
342
343
344
345
346
347 if ( ic != null )
348 {
349 if ( ic.getStatus() == CacheConstants.STATUS_ALIVE )
350 {
351 try
352 {
353
354
355
356
357 if ( facade.noWaits[0] != null && facade.noWaits[0].getStatus() == CacheConstants.STATUS_ALIVE )
358 {
359 int fidx = facade.remoteCacheAttributes.getFailoverIndex();
360
361 if ( fidx > 0 )
362 {
363 String serverOld = failovers[fidx];
364
365 if ( log.isDebugEnabled() )
366 {
367 log.debug( "Failover Index = " + fidx + " the server at that index is ["
368 + serverOld + "]" );
369 }
370
371 if ( serverOld != null )
372 {
373
374
375 RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy();
376 rcaOld.setRemoteHost( serverOld.substring( 0, serverOld.indexOf( ":" ) ) );
377 rcaOld.setRemotePort( Integer.parseInt( serverOld.substring( serverOld
378 .indexOf( ":" ) + 1 ) ) );
379 RemoteCacheManager rcmOld = RemoteCacheManager.getInstance( rcaOld, cacheMgr );
380
381 if ( rcmOld != null )
382 {
383
384
385 rcmOld.removeRemoteCacheListener( rcaOld );
386 }
387 if ( log.isInfoEnabled() )
388 {
389 log.info( "Successfully deregistered from FAILOVER remote server = "
390 + serverOld );
391 }
392 }
393 }
394 else if ( fidx == 0 )
395 {
396
397
398 if ( log.isDebugEnabled() )
399 {
400 log.debug( "No need to restore primary, it is already restored." );
401 return true;
402 }
403 }
404 else if ( fidx < 0 )
405 {
406
407 log.warn( "Failover index is less than 0, this shouldn't happen" );
408 }
409 }
410 }
411 catch ( Exception e )
412 {
413
414 log.error(
415 "Trouble trying to deregister old failover listener prior to restoring the primary = "
416 + server, e );
417 }
418
419
420
421 RemoteCacheNoWait failoverNoWait = facade.noWaits[0];
422
423
424 facade.noWaits = new RemoteCacheNoWait[1];
425 facade.noWaits[0] = (RemoteCacheNoWait) ic;
426 facade.remoteCacheAttributes.setFailoverIndex( 0 );
427
428 if ( log.isInfoEnabled() )
429 {
430 log.info( "Successfully reconnected to PRIMARY remote server. Substituted primary for failoverNoWait [" + failoverNoWait + "]" );
431 }
432 return true;
433 }
434
435
436
437
438
439 if ( log.isDebugEnabled() )
440 {
441 log.debug( "Primary server status in error, not connected." );
442 }
443 }
444 else
445 {
446 if ( log.isDebugEnabled() )
447 {
448 log.debug( "Primary server is null, not connected." );
449 }
450 }
451 }
452 catch ( Exception ex )
453 {
454 log.error( ex );
455 }
456 return false;
457 }
458
459 /***
460 * Sets the "alright" flag to false in a critial section. This flag
461 * indicates whether or not we are connected to any server at all. If we are
462 * connected to a secondary server, then alright will be true, but we will
463 * continue to try to restore the connetion with the primary server.
464 * <p>
465 * The primary server is the first server defines in the FailoverServers
466 * list.
467 */
468 private void bad()
469 {
470 if ( alright )
471 {
472 synchronized ( this )
473 {
474 alright = false;
475 }
476 }
477 }
478 }