1 /** 2 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 3 * 4 * http://www.apache.org/licenses/LICENSE-2.0 5 * 6 * Unless required by applicable law or agreed to in writing, software 7 * distributed under the License is distributed on an "AS IS" BASIS, 8 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 * See the License for the specific language governing permissions and 10 * limitations under the License. 11 */ 12 package org.apache.omid.benchmarks.utils; 13 14 15 /** 16 * A generator of a zipfian distribution. It produces a sequence of items, such that some items are more popular than others, according 17 * to a zipfian distribution. When you construct an instance of this class, you specify the number of items in the set to draw from, either 18 * by specifying an itemcount (so that the sequence is of items from 0 to itemcount-1) or by specifying a min and a max (so that the sequence is of 19 * items from min to max inclusive). After you construct the instance, you can change the number of items by calling nextInt(itemcount) or nextLong(itemcount). 20 * 21 * Unlike @ZipfianGenerator, this class scatters the "popular" items across the itemspace. Use this, instead of @ZipfianGenerator, if you 22 * don't want the head of the distribution (the popular items) clustered together. 23 */ 24 public class ScrambledZipfianGenerator extends IntegerGenerator { 25 public static final double ZETAN = 26.46902820178302; 26 public static final double USED_ZIPFIAN_CONSTANT = 0.99; 27 public static final long ITEM_COUNT = 10000000000L; 28 29 ZipfianGenerator gen; 30 long _min, _max, _itemcount; 31 32 /******************************* Constructors **************************************/ 33 34 /** 35 * Create a zipfian generator for the specified number of items. 36 * @param _items The number of items in the distribution. 37 */ 38 public ScrambledZipfianGenerator(long _items) { 39 this(0, _items - 1); 40 } 41 42 /** 43 * Create a zipfian generator for items between min and max. 44 * @param _min The smallest integer to generate in the sequence. 45 * @param _max The largest integer to generate in the sequence. 46 */ 47 public ScrambledZipfianGenerator(long _min, long _max) { 48 this(_min, _max, ZipfianGenerator.ZIPFIAN_CONSTANT); 49 } 50 51 /** 52 * Create a zipfian generator for the specified number of items using the specified zipfian constant. 53 * 54 * @param _items The number of items in the distribution. 55 * @param _zipfianconstant The zipfian constant to use. 56 */ 57 /* 58 // not supported, as the value of zeta depends on the zipfian constant, and we have only precomputed zeta for one zipfian constant 59 public ScrambledZipfianGenerator(long _items, double _zipfianconstant) 60 { 61 this(0,_items-1,_zipfianconstant); 62 } 63 */ 64 65 /** 66 * Create a zipfian generator for items between min and max (inclusive) for the specified zipfian constant. If you 67 * use a zipfian constant other than 0.99, this will take a long time to complete because we need to recompute zeta. 68 * @param min The smallest integer to generate in the sequence. 69 * @param max The largest integer to generate in the sequence. 70 * @param _zipfianconstant The zipfian constant to use. 71 */ 72 public ScrambledZipfianGenerator(long min, long max, double _zipfianconstant) { 73 _min = min; 74 _max = max; 75 _itemcount = _max - _min + 1; 76 if (_zipfianconstant == USED_ZIPFIAN_CONSTANT) { 77 gen = new ZipfianGenerator(0, ITEM_COUNT, _zipfianconstant, ZETAN); 78 } else { 79 gen = new ZipfianGenerator(0, ITEM_COUNT, _zipfianconstant); 80 } 81 } 82 83 /**************************************************************************************************/ 84 85 /** 86 * Return the next int in the sequence. 87 */ 88 @Override 89 public int nextInt() { 90 return (int) nextLong(); 91 } 92 93 /** 94 * @return the next long in the sequence. 95 */ 96 public long nextLong() { 97 long ret = gen.nextLong(); 98 ret = _min + FNVhash64(ret) % _itemcount; 99 setLastInt((int) ret); 100 return ret; 101 } 102 103 public static void main(String[] args) { 104 double newzetan = ZipfianGenerator.zetastatic(ITEM_COUNT, ZipfianGenerator.ZIPFIAN_CONSTANT); 105 System.out.println("zetan: " + newzetan); 106 System.exit(0); 107 108 ScrambledZipfianGenerator gen = new ScrambledZipfianGenerator(10000); 109 110 for (int i = 0; i < 1000000; i++) { 111 System.out.println("" + gen.nextInt()); 112 } 113 } 114 115 /** 116 * since the values are scrambled (hopefully uniformly), the mean is simply the middle of the range. 117 */ 118 @Override 119 public double mean() { 120 return ((double) (_min + _max)) / 2.0; 121 } 122 123 /** 124 * 64 bit FNV hash. Produces more "random" hashes than (say) String.hashCode(). 125 * 126 * @param val The value to hash. 127 * @return The hash value 128 */ 129 public static long FNVhash64(long val) { 130 //from http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash 131 long hashval = FNV_offset_basis_64; 132 133 for (int i = 0; i < 8; i++) { 134 long octet = val & 0x00ff; 135 val = val >> 8; 136 137 hashval = hashval ^ octet; 138 hashval = hashval * FNV_prime_64; 139 //hashval = hashval ^ octet; 140 } 141 return Math.abs(hashval); 142 } 143 144 public static final long FNV_offset_basis_64 = 0xCBF29CE484222325L; 145 public static final long FNV_prime_64 = 1099511628211L; 146 147 }