View Javadoc

1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one
3    *  or more contributor license agreements.  See the NOTICE file
4    *  distributed with this work for additional information
5    *  regarding copyright ownership.  The ASF licenses this file
6    *  to you under the Apache License, Version 2.0 (the
7    *  "License"); you may not use this file except in compliance
8    *  with the License.  You may obtain a copy of the License at
9    *
10   *    http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *  Unless required by applicable law or agreed to in writing,
13   *  software distributed under the License is distributed on an
14   *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   *  KIND, either express or implied.  See the License for the
16   *  specific language governing permissions and limitations
17   *  under the License.
18   *
19   */
20  package org.apache.mina.util;
21  
22  import java.security.InvalidParameterException;
23  
24  /**
25   * Provides Base64 encoding and decoding as defined by RFC 2045.
26   * 
27   * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> 
28   * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One: 
29   * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p> 
30   *
31   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
32   * 
33   *  This class was 
34   * @author Apache Software Foundation commons codec (http://commons.apache.org/codec/)
35   * @author The Apache MINA Project (dev@mina.apache.org)
36   * @version $Id: $
37   */
38  public class Base64 {
39  
40      /**
41       * Chunk size per RFC 2045 section 6.8.
42       * 
43       * <p>The {@value} character limit does not count the trailing CRLF, but counts 
44       * all other characters, including any equal signs.</p>
45       * 
46       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
47       */
48      static final int CHUNK_SIZE = 76;
49  
50      /**
51       * Chunk separator per RFC 2045 section 2.1.
52       * 
53       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
54       */
55      static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
56  
57      /**
58       * The base length.
59       */
60      static final int BASELENGTH = 255;
61  
62      /**
63       * Lookup length.
64       */
65      static final int LOOKUPLENGTH = 64;
66  
67      /**
68       * Used to calculate the number of bits in a byte.
69       */
70      static final int EIGHTBIT = 8;
71  
72      /**
73       * Used when encoding something which has fewer than 24 bits.
74       */
75      static final int SIXTEENBIT = 16;
76  
77      /**
78       * Used to determine how many bits data contains.
79       */
80      static final int TWENTYFOURBITGROUP = 24;
81  
82      /**
83       * Used to get the number of Quadruples.
84       */
85      static final int FOURBYTE = 4;
86  
87      /**
88       * Used to test the sign of a byte.
89       */
90      static final int SIGN = -128;
91  
92      /**
93       * Byte used to pad output.
94       */
95      static final byte PAD = (byte) '=';
96  
97      // Create arrays to hold the base64 characters and a 
98      // lookup for base64 chars
99      private static byte[] base64Alphabet = new byte[BASELENGTH];
100 
101     private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
102 
103     // Populating the lookup and character arrays
104     static {
105         for (int i = 0; i < BASELENGTH; i++) {
106             base64Alphabet[i] = (byte) -1;
107         }
108         for (int i = 'Z'; i >= 'A'; i--) {
109             base64Alphabet[i] = (byte) (i - 'A');
110         }
111         for (int i = 'z'; i >= 'a'; i--) {
112             base64Alphabet[i] = (byte) (i - 'a' + 26);
113         }
114         for (int i = '9'; i >= '0'; i--) {
115             base64Alphabet[i] = (byte) (i - '0' + 52);
116         }
117 
118         base64Alphabet['+'] = 62;
119         base64Alphabet['/'] = 63;
120 
121         for (int i = 0; i <= 25; i++) {
122             lookUpBase64Alphabet[i] = (byte) ('A' + i);
123         }
124 
125         for (int i = 26, j = 0; i <= 51; i++, j++) {
126             lookUpBase64Alphabet[i] = (byte) ('a' + j);
127         }
128 
129         for (int i = 52, j = 0; i <= 61; i++, j++) {
130             lookUpBase64Alphabet[i] = (byte) ('0' + j);
131         }
132 
133         lookUpBase64Alphabet[62] = (byte) '+';
134         lookUpBase64Alphabet[63] = (byte) '/';
135     }
136 
137     private static boolean isBase64(byte octect) {
138         if (octect == PAD) {
139             return true;
140         } else if (base64Alphabet[octect] == -1) {
141             return false;
142         } else {
143             return true;
144         }
145     }
146 
147     /**
148      * Tests a given byte array to see if it contains
149      * only valid characters within the Base64 alphabet.
150      *
151      * @param arrayOctect byte array to test
152      * @return true if all bytes are valid characters in the Base64
153      *         alphabet or if the byte array is empty; false, otherwise
154      */
155     public static boolean isArrayByteBase64(byte[] arrayOctect) {
156 
157         arrayOctect = discardWhitespace(arrayOctect);
158 
159         int length = arrayOctect.length;
160         if (length == 0) {
161             // shouldn't a 0 length array be valid base64 data?
162             // return false;
163             return true;
164         }
165         for (int i = 0; i < length; i++) {
166             if (!isBase64(arrayOctect[i])) {
167                 return false;
168             }
169         }
170         return true;
171     }
172 
173     /**
174      * Encodes binary data using the base64 algorithm but
175      * does not chunk the output.
176      *
177      * @param binaryData binary data to encode
178      * @return Base64 characters
179      */
180     public static byte[] encodeBase64(byte[] binaryData) {
181         return encodeBase64(binaryData, false);
182     }
183 
184     /**
185      * Encodes binary data using the base64 algorithm and chunks
186      * the encoded output into 76 character blocks
187      *
188      * @param binaryData binary data to encode
189      * @return Base64 characters chunked in 76 character blocks
190      */
191     public static byte[] encodeBase64Chunked(byte[] binaryData) {
192         return encodeBase64(binaryData, true);
193     }
194 
195     /**
196      * Decodes an Object using the base64 algorithm.  This method
197      * is provided in order to satisfy the requirements of the
198      * Decoder interface, and will throw a DecoderException if the
199      * supplied object is not of type byte[].
200      *
201      * @param pObject Object to decode
202      * @return An object (of type byte[]) containing the 
203      *         binary data which corresponds to the byte[] supplied.
204      * @throws InvalidParameterException if the parameter supplied is not
205      *                          of type byte[]
206      */
207     public Object decode(Object pObject) {
208         if (!(pObject instanceof byte[])) {
209             throw new InvalidParameterException(
210                     "Parameter supplied to Base64 decode is not a byte[]");
211         }
212         return decode((byte[]) pObject);
213     }
214 
215     /**
216      * Decodes a byte[] containing containing
217      * characters in the Base64 alphabet.
218      *
219      * @param pArray A byte array containing Base64 character data
220      * @return a byte array containing binary data
221      */
222     public byte[] decode(byte[] pArray) {
223         return decodeBase64(pArray);
224     }
225 
226     /**
227      * Encodes binary data using the base64 algorithm, optionally
228      * chunking the output into 76 character blocks.
229      *
230      * @param binaryData Array containing binary data to encode.
231      * @param isChunked if isChunked is true this encoder will chunk
232      *                  the base64 output into 76 character blocks
233      * @return Base64-encoded data.
234      */
235     public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
236         int lengthDataBits = binaryData.length * EIGHTBIT;
237         int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
238         int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
239         byte encodedData[] = null;
240         int encodedDataLength = 0;
241         int nbrChunks = 0;
242 
243         if (fewerThan24bits != 0) {
244             //data not divisible by 24 bit
245             encodedDataLength = (numberTriplets + 1) * 4;
246         } else {
247             // 16 or 8 bit
248             encodedDataLength = numberTriplets * 4;
249         }
250 
251         // If the output is to be "chunked" into 76 character sections, 
252         // for compliance with RFC 2045 MIME, then it is important to 
253         // allow for extra length to account for the separator(s)
254         if (isChunked) {
255 
256             nbrChunks = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
257                     .ceil((float) encodedDataLength / CHUNK_SIZE));
258             encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
259         }
260 
261         encodedData = new byte[encodedDataLength];
262 
263         byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
264 
265         int encodedIndex = 0;
266         int dataIndex = 0;
267         int i = 0;
268         int nextSeparatorIndex = CHUNK_SIZE;
269         int chunksSoFar = 0;
270 
271         //log.debug("number of triplets = " + numberTriplets);
272         for (i = 0; i < numberTriplets; i++) {
273             dataIndex = i * 3;
274             b1 = binaryData[dataIndex];
275             b2 = binaryData[dataIndex + 1];
276             b3 = binaryData[dataIndex + 2];
277 
278             //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
279 
280             l = (byte) (b2 & 0x0f);
281             k = (byte) (b1 & 0x03);
282 
283             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
284                     : (byte) ((b1) >> 2 ^ 0xc0);
285             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4)
286                     : (byte) ((b2) >> 4 ^ 0xf0);
287             byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6)
288                     : (byte) ((b3) >> 6 ^ 0xfc);
289 
290             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
291             //log.debug( "val2 = " + val2 );
292             //log.debug( "k4   = " + (k<<4) );
293             //log.debug(  "vak  = " + (val2 | (k<<4)) );
294             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2
295                     | (k << 4)];
296             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2)
297                     | val3];
298             encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
299 
300             encodedIndex += 4;
301 
302             // If we are chunking, let's put a chunk separator down.
303             if (isChunked) {
304                 // this assumes that CHUNK_SIZE % 4 == 0
305                 if (encodedIndex == nextSeparatorIndex) {
306                     System.arraycopy(CHUNK_SEPARATOR, 0, encodedData,
307                             encodedIndex, CHUNK_SEPARATOR.length);
308                     chunksSoFar++;
309                     nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1))
310                             + (chunksSoFar * CHUNK_SEPARATOR.length);
311                     encodedIndex += CHUNK_SEPARATOR.length;
312                 }
313             }
314         }
315 
316         // form integral number of 6-bit groups
317         dataIndex = i * 3;
318 
319         if (fewerThan24bits == EIGHTBIT) {
320             b1 = binaryData[dataIndex];
321             k = (byte) (b1 & 0x03);
322             //log.debug("b1=" + b1);
323             //log.debug("b1<<2 = " + (b1>>2) );
324             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
325                     : (byte) ((b1) >> 2 ^ 0xc0);
326             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
327             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
328             encodedData[encodedIndex + 2] = PAD;
329             encodedData[encodedIndex + 3] = PAD;
330         } else if (fewerThan24bits == SIXTEENBIT) {
331 
332             b1 = binaryData[dataIndex];
333             b2 = binaryData[dataIndex + 1];
334             l = (byte) (b2 & 0x0f);
335             k = (byte) (b1 & 0x03);
336 
337             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2)
338                     : (byte) ((b1) >> 2 ^ 0xc0);
339             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4)
340                     : (byte) ((b2) >> 4 ^ 0xf0);
341 
342             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
343             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2
344                     | (k << 4)];
345             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
346             encodedData[encodedIndex + 3] = PAD;
347         }
348 
349         if (isChunked) {
350             // we also add a separator to the end of the final chunk.
351             if (chunksSoFar < nbrChunks) {
352                 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData,
353                         encodedDataLength - CHUNK_SEPARATOR.length,
354                         CHUNK_SEPARATOR.length);
355             }
356         }
357 
358         return encodedData;
359     }
360 
361     /**
362      * Decodes Base64 data into octects
363      *
364      * @param base64Data Byte array containing Base64 data
365      * @return Array containing decoded data.
366      */
367     public static byte[] decodeBase64(byte[] base64Data) {
368         // RFC 2045 requires that we discard ALL non-Base64 characters
369         base64Data = discardNonBase64(base64Data);
370 
371         // handle the edge case, so we don't have to worry about it later
372         if (base64Data.length == 0) {
373             return new byte[0];
374         }
375 
376         int numberQuadruple = base64Data.length / FOURBYTE;
377         byte decodedData[] = null;
378         byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
379 
380         // Throw away anything not in base64Data
381 
382         int encodedIndex = 0;
383         int dataIndex = 0;
384         {
385             // this sizes the output array properly - rlw
386             int lastData = base64Data.length;
387             // ignore the '=' padding
388             while (base64Data[lastData - 1] == PAD) {
389                 if (--lastData == 0) {
390                     return new byte[0];
391                 }
392             }
393             decodedData = new byte[lastData - numberQuadruple];
394         }
395 
396         for (int i = 0; i < numberQuadruple; i++) {
397             dataIndex = i * 4;
398             marker0 = base64Data[dataIndex + 2];
399             marker1 = base64Data[dataIndex + 3];
400 
401             b1 = base64Alphabet[base64Data[dataIndex]];
402             b2 = base64Alphabet[base64Data[dataIndex + 1]];
403 
404             if (marker0 != PAD && marker1 != PAD) {
405                 //No PAD e.g 3cQl
406                 b3 = base64Alphabet[marker0];
407                 b4 = base64Alphabet[marker1];
408 
409                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
410                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
411                 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
412             } else if (marker0 == PAD) {
413                 //Two PAD e.g. 3c[Pad][Pad]
414                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
415             } else if (marker1 == PAD) {
416                 //One PAD e.g. 3cQ[Pad]
417                 b3 = base64Alphabet[marker0];
418 
419                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
420                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
421             }
422             encodedIndex += 3;
423         }
424         return decodedData;
425     }
426 
427     /**
428      * Discards any whitespace from a base-64 encoded block.
429      *
430      * @param data The base-64 encoded data to discard the whitespace
431      * from.
432      * @return The data, less whitespace (see RFC 2045).
433      */
434     static byte[] discardWhitespace(byte[] data) {
435         byte groomedData[] = new byte[data.length];
436         int bytesCopied = 0;
437 
438         for (int i = 0; i < data.length; i++) {
439             switch (data[i]) {
440             case (byte) ' ':
441             case (byte) '\n':
442             case (byte) '\r':
443             case (byte) '\t':
444                 break;
445             default:
446                 groomedData[bytesCopied++] = data[i];
447             }
448         }
449 
450         byte packedData[] = new byte[bytesCopied];
451 
452         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
453 
454         return packedData;
455     }
456 
457     /**
458      * Discards any characters outside of the base64 alphabet, per
459      * the requirements on page 25 of RFC 2045 - "Any characters
460      * outside of the base64 alphabet are to be ignored in base64
461      * encoded data."
462      *
463      * @param data The base-64 encoded data to groom
464      * @return The data, less non-base64 characters (see RFC 2045).
465      */
466     static byte[] discardNonBase64(byte[] data) {
467         byte groomedData[] = new byte[data.length];
468         int bytesCopied = 0;
469 
470         for (int i = 0; i < data.length; i++) {
471             if (isBase64(data[i])) {
472                 groomedData[bytesCopied++] = data[i];
473             }
474         }
475 
476         byte packedData[] = new byte[bytesCopied];
477 
478         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
479 
480         return packedData;
481     }
482 
483     // Implementation of the Encoder Interface
484 
485     /**
486      * Encodes an Object using the base64 algorithm.  This method
487      * is provided in order to satisfy the requirements of the
488      * Encoder interface, and will throw an EncoderException if the
489      * supplied object is not of type byte[].
490      *
491      * @param pObject Object to encode
492      * @return An object (of type byte[]) containing the 
493      *         base64 encoded data which corresponds to the byte[] supplied.
494      * @throws InvalidParameterException if the parameter supplied is not
495      *                          of type byte[]
496      */
497     public Object encode(Object pObject) {
498         if (!(pObject instanceof byte[])) {
499             throw new InvalidParameterException(
500                     "Parameter supplied to Base64 encode is not a byte[]");
501         }
502         return encode((byte[]) pObject);
503     }
504 
505     /**
506      * Encodes a byte[] containing binary data, into a byte[] containing
507      * characters in the Base64 alphabet.
508      *
509      * @param pArray a byte array containing binary data
510      * @return A byte array containing only Base64 character data
511      */
512     public byte[] encode(byte[] pArray) {
513         return encodeBase64(pArray, false);
514     }
515 
516 }