View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.omid.committable.hbase;
19  
20  import com.google.common.base.Preconditions;
21  import org.apache.commons.lang.ArrayUtils;
22  import org.apache.hadoop.conf.Configuration;
23  import org.apache.hadoop.hbase.util.Bytes;
24  
25  import java.io.IOException;
26  import java.util.Arrays;
27  
28  /**
29   * This class contains only the required behavior of the original
30   * org.apache.hadoop.hbase.util.RegionSplitter class to avoid
31   * having a reference to hbase-testing-util, which transitively
32   * imports hbase-server causing dependency conflicts for this module.
33   */
34  public class RegionSplitter {
35  
36      /**
37       * A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors
38       * of this code use {@link HexStringSplit} to partition their table and set it as default, but provided this for
39       * your custom algorithm. To use, create a new derived class from this interface and call
40       * {@link RegionSplitter#createPresplitTable} or
41       * {@link RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName
42       * giving the name of your class.
43       */
44      public interface SplitAlgorithm {
45          /**
46           * Split a pre-existing region into 2 regions.
47           *
48           * @param start
49           *            first row (inclusive)
50           * @param end
51           *            last row (exclusive)
52           * @return the split row to use
53           */
54          byte[] split(byte[] start, byte[] end);
55  
56          /**
57           * Split an entire table.
58           *
59           * @param numRegions
60           *            number of regions to split the table into
61           *
62           * @throws RuntimeException
63           *             user input is validated at this time. may throw a runtime exception in response to a parse
64           *             failure
65           * @return array of split keys for the initial regions of the table. The length of the returned array should be
66           *         numRegions-1.
67           */
68          byte[][] split(int numRegions);
69  
70          /**
71           * In HBase, the first row is represented by an empty byte array. This might cause problems with your split
72           * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
73           *
74           * @return your representation of your first row
75           */
76          byte[] firstRow();
77  
78          /**
79           * In HBase, the last row is represented by an empty byte array. This might cause problems with your split
80           * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
81           *
82           * @return your representation of your last row
83           */
84          byte[] lastRow();
85  
86          /**
87           * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
88           * understand how to evenly divide the first region.
89           *
90           * @param userInput
91           *            raw user input (may throw RuntimeException on parse failure)
92           */
93          void setFirstRow(String userInput);
94  
95          /**
96           * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
97           * understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing
98           * the same prefix.
99           *
100          * @param userInput
101          *            raw user input (may throw RuntimeException on parse failure)
102          */
103         void setLastRow(String userInput);
104 
105         /**
106          * @param input
107          *            user or file input for row
108          * @return byte array representation of this row for HBase
109          */
110         byte[] strToRow(String input);
111 
112         /**
113          * @param row
114          *            byte array representing a row in HBase
115          * @return String to use for debug & file printing
116          */
117         String rowToStr(byte[] row);
118 
119         /**
120          * @return the separator character to use when storing / printing the row
121          */
122         String separator();
123 
124         /**
125          * Set the first row
126          *
127          * @param userInput
128          *            byte array of the row key.
129          */
130         void setFirstRow(byte[] userInput);
131 
132         /**
133          * Set the last row
134          *
135          * @param userInput
136          *            byte array of the row key.
137          */
138         void setLastRow(byte[] userInput);
139     }
140 
141     /**
142      * @throws IOException
143      *             if the specified SplitAlgorithm class couldn't be instantiated
144      */
145     public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
146                                                       String splitClassName) throws IOException {
147         Class<?> splitClass;
148 
149         // For split algorithms builtin to RegionSplitter, the user can specify
150         // their simple class name instead of a fully qualified class name.
151         if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
152             splitClass = UniformSplit.class;
153         } else {
154             try {
155                 splitClass = conf.getClassByName(splitClassName);
156             } catch (ClassNotFoundException e) {
157                 throw new IOException("Couldn't load split class " + splitClassName, e);
158             }
159             if (splitClass == null) {
160                 throw new IOException("Failed loading split class " + splitClassName);
161             }
162             if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
163                 throw new IOException(
164                         "Specified split class doesn't implement SplitAlgorithm");
165             }
166         }
167         try {
168             return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
169         } catch (Exception e) {
170             throw new IOException("Problem loading split algorithm: ", e);
171         }
172     }
173 
174     /**
175      * A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform
176      * random bytes (e.g. hashes). Rows are raw byte values in the range <b>00 => FF</b> and are right-padded with zeros
177      * to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space,
178      * but is not necessarily the easiest for readability.
179      */
180     public static class UniformSplit implements SplitAlgorithm {
181         static final byte xFF = (byte) 0xFF;
182         byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
183         byte[] lastRowBytes =
184                 new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
185 
186         public byte[] split(byte[] start, byte[] end) {
187             return Bytes.split(start, end, 1)[1];
188         }
189 
190         @Override
191         public byte[][] split(int numRegions) {
192             Preconditions.checkArgument(
193                     Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
194                     "last row (%s) is configured less than first row (%s)",
195                     Bytes.toStringBinary(lastRowBytes),
196                     Bytes.toStringBinary(firstRowBytes));
197 
198             byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
199                     numRegions - 1);
200             Preconditions.checkState(splits != null,
201                     "Could not split region with given user input: " + this);
202 
203             // remove endpoints, which are included in the splits list
204             return Arrays.copyOfRange(splits, 1, splits.length - 1);
205         }
206 
207         @Override
208         public byte[] firstRow() {
209             return firstRowBytes;
210         }
211 
212         @Override
213         public byte[] lastRow() {
214             return lastRowBytes;
215         }
216 
217         @Override
218         public void setFirstRow(String userInput) {
219             firstRowBytes = Bytes.toBytesBinary(userInput);
220         }
221 
222         @Override
223         public void setLastRow(String userInput) {
224             lastRowBytes = Bytes.toBytesBinary(userInput);
225         }
226 
227         @Override
228         public void setFirstRow(byte[] userInput) {
229             firstRowBytes = userInput;
230         }
231 
232         @Override
233         public void setLastRow(byte[] userInput) {
234             lastRowBytes = userInput;
235         }
236 
237         @Override
238         public byte[] strToRow(String input) {
239             return Bytes.toBytesBinary(input);
240         }
241 
242         @Override
243         public String rowToStr(byte[] row) {
244             return Bytes.toStringBinary(row);
245         }
246 
247         @Override
248         public String separator() {
249             return ",";
250         }
251 
252         @Override
253         public String toString() {
254             return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
255                     + "," + rowToStr(lastRow()) + "]";
256         }
257     }
258 }