diff options
author | RJ Nowling <rnowling@gmail.com> | 2015-08-26 08:53:19 -0500 |
---|---|---|
committer | RJ Nowling <rnowling@gmail.com> | 2015-08-26 08:53:19 -0500 |
commit | 74a66b1e8e0a3721f834bd9c36b826a68b494d34 (patch) | |
tree | 34efea732ef8b6103ccbc582229c1d9badcd1b97 | |
parent | caae595712381706a4ed4e938c393ede3b1311b2 (diff) |
BIGTOP-1986. Extract location dataset from BigPetStore data generator
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/README.md | 50 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/build.gradle | 63 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/settings.gradle | 16 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/ZipcodeRecord.java) | 8 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java | 25 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/ZipcodeReader.java) | 41 | ||||
-rwxr-xr-x | bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt) | 0 | ||||
-rwxr-xr-x | bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv) | 0 | ||||
-rwxr-xr-x | bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv) | 0 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/population_data.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/population_data.csv) | 0 | ||||
-rw-r--r-- | bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/zips.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/zips.csv) | 0 |
11 files changed, 173 insertions, 30 deletions
diff --git a/bigtop-data-generators/bigtop-location-data/README.md b/bigtop-data-generators/bigtop-location-data/README.md new file mode 100644 index 00000000..9aff06f9 --- /dev/null +++ b/bigtop-data-generators/bigtop-location-data/README.md @@ -0,0 +1,50 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +BigTop Location Data +==================== + +U.S. zipcode data including GPS coordinates, median household incomes, +and population sizes from the U.S. Census along with a reader and +data model. + +Building and Testing +-------------------- +We use the Gradle build system for the BPS data generator so you'll need +to install Gradle on your system. +Once that's done, you can use gradle to run the included unit tests +and build the data generator jar. + +To build: + + $ gradle build + +This will create several directories and a jar located at: + + build/libs/bigtop-location-data-1.1.0-SNAPSHOT.jar + +Building automatically runs the included unit tests. If you would prefer +to just run the unit tests, you can do so by: + + $ gradle test + +To clean up the build files, run: + + $ gradle clean + +To install a jar into your local maven repository: + + $ gradle install diff --git a/bigtop-data-generators/bigtop-location-data/build.gradle b/bigtop-data-generators/bigtop-location-data/build.gradle new file mode 100644 index 00000000..9eb91e39 --- /dev/null +++ b/bigtop-data-generators/bigtop-location-data/build.gradle @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply plugin: 'eclipse' +apply plugin: 'groovy' +apply plugin: 'java' +apply plugin: 'maven' +group = 'org.apache.bigtop' +version = '1.1.0-SNAPSHOT' + +jar { + + from { + configurations.runtime.collect { + it.isDirectory() ? it : zipTree(it) + } + } + + manifest { + attributes 'Title': 'BigTop Samplers', 'Version': version + } +} + +repositories { + mavenLocal() + mavenCentral() +} + +test { + // show standard out and error on console + testLogging.showStandardStreams = true + + // listen to events in the test execution lifecycle + beforeTest { descriptor -> + logger.lifecycle("Running test: " + descriptor) + } + + // listen to standard out and standard error of the test JVM(s) + onOutput { descriptor, event -> + logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message ) + } + +} + +dependencies { + compile 'com.google.guava:guava:18.0' + + compile 'org.apache.commons:commons-lang3:3.4' + + testCompile 'junit:junit:4.+' +} diff --git a/bigtop-data-generators/bigtop-location-data/settings.gradle b/bigtop-data-generators/bigtop-location-data/settings.gradle new file mode 100644 index 00000000..f66bfdbc --- /dev/null +++ b/bigtop-data-generators/bigtop-location-data/settings.gradle @@ -0,0 +1,16 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +rootProject.name = "bigtop-location-data"
\ No newline at end of file diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/ZipcodeRecord.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java index e5eeb600..62afc0a8 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/ZipcodeRecord.java +++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs; +package org.apache.bigtop.datagenerators.locations; import java.io.Serializable; import org.apache.commons.lang3.tuple.Pair; -public class ZipcodeRecord implements Serializable +public class Location implements Serializable { private static final long serialVersionUID = 1769986686070108470L; @@ -30,7 +30,7 @@ public class ZipcodeRecord implements Serializable final double medianHouseholdIncome; final long population; - public ZipcodeRecord(String zipcode, Pair<Double, Double> coordinates, + public Location(String zipcode, Pair<Double, Double> coordinates, String city, String state, double medianHouseholdIncome, long population) { this.city = city; @@ -61,7 +61,7 @@ public class ZipcodeRecord implements Serializable return population; } - public double distance(ZipcodeRecord other) + public double distance(Location other) { if(other.getZipcode().equals(zipcode)) return 0.0; diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java new file mode 100644 index 00000000..4140bfd4 --- /dev/null +++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.locations; + +import java.io.File; + +public class LocationConstants +{ + public static final File COORDINATES_FILE = new File("zips.csv"); + public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); + public static final File POPULATION_FILE = new File("population_data.csv"); +} diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/ZipcodeReader.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java index 2478c8e1..5be4d59d 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/ZipcodeReader.java +++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java @@ -13,8 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.bigtop.datagenerators.bigpetstore.datareaders; +package org.apache.bigtop.datagenerators.locations; +import java.io.BufferedInputStream; +import java.io.File; import java.io.FileNotFoundException; import java.io.InputStream; import java.util.HashSet; @@ -24,14 +26,13 @@ import java.util.Scanner; import java.util.Set; import java.util.Vector; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; import org.apache.commons.lang3.tuple.Pair; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -public class ZipcodeReader +public class LocationReader { private static class ZipcodeLocationRecord { @@ -47,24 +48,11 @@ public class ZipcodeReader this.state = state; } } - - InputStream zipcodeIncomesFile = null; - InputStream zipcodePopulationFile = null; - InputStream zipcodeCoordinatesFile = null; - - public void setIncomesFile(InputStream path) - { - this.zipcodeIncomesFile = path; - } - - public void setPopulationFile(InputStream path) - { - this.zipcodePopulationFile = path; - } - - public void setCoordinatesFile(InputStream path) + + private InputStream getResource(File filename) { - this.zipcodeCoordinatesFile = path; + InputStream stream = getClass().getResourceAsStream("/input_data/" + filename); + return new BufferedInputStream(stream); } private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException @@ -166,20 +154,21 @@ public class ZipcodeReader return ImmutableMap.copyOf(entries); } - public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException + public ImmutableList<Location> readData() throws FileNotFoundException { - ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile); - ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile); - ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile); + + ImmutableMap<String, Double> incomes = readIncomeData(getResource(LocationConstants.INCOMES_FILE)); + ImmutableMap<String, Long> populations = readPopulationData(getResource(LocationConstants.POPULATION_FILE)); + ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(getResource(LocationConstants.COORDINATES_FILE)); Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet()); zipcodeSubset.retainAll(populations.keySet()); zipcodeSubset.retainAll(coordinates.keySet()); - List<ZipcodeRecord> table = new Vector<ZipcodeRecord>(); + List<Location> table = new Vector<Location>(); for(String zipcode : zipcodeSubset) { - ZipcodeRecord record = new ZipcodeRecord(zipcode, + Location record = new Location(zipcode, coordinates.get(zipcode).coordinates, coordinates.get(zipcode).city, coordinates.get(zipcode).state, diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt index 7127f905..7127f905 100755 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt +++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv index 9f703a36..9f703a36 100755 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv +++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv index b5a5e027..b5a5e027 100755 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv +++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/population_data.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/population_data.csv index 027c8396..027c8396 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/population_data.csv +++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/population_data.csv diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/zips.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/zips.csv index 7e487a25..7e487a25 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/zips.csv +++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/zips.csv |