View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.omid.committable.hbase;
19  
20  import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;
21  
22  import org.apache.commons.lang.ArrayUtils;
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.util.Bytes;
25  
26  import java.io.IOException;
27  import java.util.Arrays;
28  
29  /**
30   * This class contains only the required behavior of the original
31   * org.apache.hadoop.hbase.util.RegionSplitter class to avoid
32   * having a reference to hbase-testing-util, which transitively
33   * imports hbase-server causing dependency conflicts for this module.
34   */
35  public class RegionSplitter {
36  
37      /**
38       * A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors
39       * of this code use see org.apache.hadoop.hbase.util.HexStringSplit to partition their table and set it as default, but provided this for
40       * your custom algorithm. To use, create a new derived class from this interface and call
41       * see RegionSplitter#createPresplitTable or
42       * see RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName
43       * giving the name of your class.
44       */
45      public interface SplitAlgorithm {
46  
47          /**
48           * Split a pre-existing region into 2 regions.
49           *
50           * @param start
51           *            first row (inclusive)
52           * @param end
53           *            last row (exclusive)
54           * @return the split row to use
55           */
56          byte[] split(byte[] start, byte[] end);
57  
58          /**
59           * Split an entire table.
60           *
61           * @param numRegions
62           *            number of regions to split the table into
63           *
64           * @throws RuntimeException
65           *             user input is validated at this time. may throw a runtime exception in response to a parse
66           *             failure
67           * @return array of split keys for the initial regions of the table. The length of the returned array should be
68           *         numRegions-1.
69           */
70          byte[][] split(int numRegions);
71  
72          /**
73           * In HBase, the first row is represented by an empty byte array. This might cause problems with your split
74           * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
75           *
76           * @return your representation of your first row
77           */
78          byte[] firstRow();
79  
80          /**
81           * In HBase, the last row is represented by an empty byte array. This might cause problems with your split
82           * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
83           *
84           * @return your representation of your last row
85           */
86          byte[] lastRow();
87  
88          /**
89           * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
90           * understand how to evenly divide the first region.
91           *
92           * @param userInput
93           *            raw user input (may throw RuntimeException on parse failure)
94           */
95          void setFirstRow(String userInput);
96  
97          /**
98           * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
99           * understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing
100          * the same prefix.
101          *
102          * @param userInput raw user input (may throw RuntimeException on parse failure)
103          */
104         void setLastRow(String userInput);
105 
106         /**
107          * @param input
108          *            user or file input for row
109          * @return byte array representation of this row for HBase
110          */
111         byte[] strToRow(String input);
112 
113         /**
114          * @param row byte array representing a row in HBase
115          * @return String to use for debug and file printing
116          */
117         String rowToStr(byte[] row);
118 
119         /**
120          * @return the separator character to use when storing / printing the row
121          */
122         String separator();
123 
124         /**
125          * Set the first row
126          *
127          * @param userInput
128          *            byte array of the row key.
129          */
130         void setFirstRow(byte[] userInput);
131 
132         /**
133          * Set the last row
134          *
135          * @param userInput
136          *            byte array of the row key.
137          */
138         void setLastRow(byte[] userInput);
139     }
140 
141     /**
142      * @param conf Hbase conf
143      * @param splitClassName split class name to be used
144      * @return an instance of SplitAlgorithm
145      * @throws IOException if the specified SplitAlgorithm class couldn't be instantiated
146      */
147     public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
148                                                       String splitClassName) throws IOException {
149         Class<?> splitClass;
150 
151         // For split algorithms builtin to RegionSplitter, the user can specify
152         // their simple class name instead of a fully qualified class name.
153         if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
154             splitClass = UniformSplit.class;
155         } else {
156             try {
157                 splitClass = conf.getClassByName(splitClassName);
158             } catch (ClassNotFoundException e) {
159                 throw new IOException("Couldn't load split class " + splitClassName, e);
160             }
161             if (splitClass == null) {
162                 throw new IOException("Failed loading split class " + splitClassName);
163             }
164             if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
165                 throw new IOException(
166                     "Specified split class doesn't implement SplitAlgorithm");
167             }
168         }
169         try {
170             return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
171         } catch (Exception e) {
172             throw new IOException("Problem loading split algorithm: ", e);
173         }
174     }
175 
176     /**
177      * A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform
178      * random bytes (e.g. hashes). Rows are raw byte values in the range [00..FF] and are right-padded with zeros
179      * to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space,
180      * but is not necessarily the easiest for readability.
181      */
182     public static class UniformSplit implements SplitAlgorithm {
183 
184         static final byte xFF = (byte) 0xFF;
185         byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
186         byte[] lastRowBytes =
187             new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
188 
189         public byte[] split(byte[] start, byte[] end) {
190             return Bytes.split(start, end, 1)[1];
191         }
192 
193         @Override
194         public byte[][] split(int numRegions) {
195             Preconditions.checkArgument(
196                 Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
197                 "last row (%s) is configured less than first row (%s)",
198                 Bytes.toStringBinary(lastRowBytes),
199                 Bytes.toStringBinary(firstRowBytes));
200 
201             byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
202                                           numRegions - 1);
203             Preconditions.checkState(splits != null,
204                                      "Could not split region with given user input: " + this);
205 
206             // remove endpoints, which are included in the splits list
207             return Arrays.copyOfRange(splits, 1, splits.length - 1);
208         }
209 
210         @Override
211         public byte[] firstRow() {
212             return firstRowBytes;
213         }
214 
215         @Override
216         public byte[] lastRow() {
217             return lastRowBytes;
218         }
219 
220         @Override
221         public void setFirstRow(String userInput) {
222             firstRowBytes = Bytes.toBytesBinary(userInput);
223         }
224 
225         @Override
226         public void setLastRow(String userInput) {
227             lastRowBytes = Bytes.toBytesBinary(userInput);
228         }
229 
230         @Override
231         public void setFirstRow(byte[] userInput) {
232             firstRowBytes = userInput;
233         }
234 
235         @Override
236         public void setLastRow(byte[] userInput) {
237             lastRowBytes = userInput;
238         }
239 
240         @Override
241         public byte[] strToRow(String input) {
242             return Bytes.toBytesBinary(input);
243         }
244 
245         @Override
246         public String rowToStr(byte[] row) {
247             return Bytes.toStringBinary(row);
248         }
249 
250         @Override
251         public String separator() {
252             return ",";
253         }
254 
255         @Override
256         public String toString() {
257             return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
258                    + "," + rowToStr(lastRow()) + "]";
259         }
260     }
261 }