Ingredient: Using anonymizersΒΆ
This example shows a shelf with anonymizers. Anonymizers make a dataset look like it has random data by modifying the ingredient value.
[7]:
from examples_base import *
# The state anonymizer reverses the state name.
# The population anonymizer adds 1000
shelf = Shelf({
'state': Dimension(Census.state, anonymizer=lambda v: v[::-1]),
'population': Metric(func.sum(Census.pop2000), anonymizer=lambda v: v+1000)
})
# To use the anonymizer, the extension class Anonymize must be used.
# and the anonymize flag must be True
recipe = Recipe(shelf=shelf, session=oven.Session(), extension_classes=[Anonymize])\
.dimensions('state').metrics('population').anonymize(True)
# Look at the output.
print(recipe.to_sql())
recipe.dataset.df
SELECT census.state AS state_raw,
sum(census.pop2000) AS population_raw
FROM census
GROUP BY census.state
[7]:
| state_raw | population_raw | state | state_id | population | |
|---|---|---|---|---|---|
| 0 | Alabama | 4438559 | amabalA | Alabama | 4439559 |
| 1 | Alaska | 608588 | aksalA | Alaska | 609588 |
| 2 | Arizona | 5143931 | anozirA | Arizona | 5144931 |
| 3 | Arkansas | 2672434 | sasnakrA | Arkansas | 2673434 |
| 4 | California | 33829442 | ainrofilaC | California | 33830442 |
| 5 | Colorado | 4300877 | odaroloC | Colorado | 4301877 |
| 6 | Connecticut | 3403620 | tucitcennoC | Connecticut | 3404620 |
| 7 | Delaware | 782386 | erawaleD | Delaware | 783386 |
| 8 | District of Columbia | 568103 | aibmuloC fo tcirtsiD | District of Columbia | 569103 |
| 9 | Florida | 15976093 | adirolF | Florida | 15977093 |
| 10 | Georgia | 8161776 | aigroeG | Georgia | 8162776 |
| 11 | Hawaii | 1167027 | iiawaH | Hawaii | 1168027 |
| 12 | Idaho | 1294560 | ohadI | Idaho | 1295560 |
| 13 | Illinois | 12405300 | sionillI | Illinois | 12406300 |
| 14 | Indiana | 6089161 | anaidnI | Indiana | 6090161 |
| 15 | Iowa | 2926878 | awoI | Iowa | 2927878 |
| 16 | Kansas | 2675929 | sasnaK | Kansas | 2676929 |
| 17 | Kentucky | 4028389 | ykcutneK | Kentucky | 4029389 |
| 18 | Louisiana | 4449489 | anaisiuoL | Louisiana | 4450489 |
| 19 | Maine | 1271694 | eniaM | Maine | 1272694 |
| 20 | Maryland | 5274039 | dnalyraM | Maryland | 5275039 |
| 21 | Massachusetts | 6357515 | sttesuhcassaM | Massachusetts | 6358515 |
| 22 | Michigan | 9951873 | nagihciM | Michigan | 9952873 |
| 23 | Minnesota | 4931897 | atosenniM | Minnesota | 4932897 |
| 24 | Mississippi | 2830133 | ippississiM | Mississippi | 2831133 |
| 25 | Missouri | 5588759 | iruossiM | Missouri | 5589759 |
| 26 | Montana | 899459 | anatnoM | Montana | 900459 |
| 27 | Nebraska | 1705040 | aksarbeN | Nebraska | 1706040 |
| 28 | Nevada | 2010272 | adaveN | Nevada | 2011272 |
| 29 | New Hampshire | 1239307 | erihspmaH weN | New Hampshire | 1240307 |
| 30 | New Jersey | 8420023 | yesreJ weN | New Jersey | 8421023 |
| 31 | New Mexico | 1809015 | ocixeM weN | New Mexico | 1810015 |
| 32 | New York | 18978668 | kroY weN | New York | 18979668 |
| 33 | North Carolina | 7978581 | aniloraC htroN | North Carolina | 7979581 |
| 34 | North Dakota | 633621 | atokaD htroN | North Dakota | 634621 |
| 35 | Ohio | 11355210 | oihO | Ohio | 11356210 |
| 36 | Oklahoma | 3430420 | amohalkO | Oklahoma | 3431420 |
| 37 | Oregon | 3428319 | nogerO | Oregon | 3429319 |
| 38 | Pennsylvania | 12276157 | ainavlysnneP | Pennsylvania | 12277157 |
| 39 | Rhode Island | 1047200 | dnalsI edohR | Rhode Island | 1048200 |
| 40 | South Carolina | 3983917 | aniloraC htuoS | South Carolina | 3984917 |
| 41 | South Dakota | 752231 | atokaD htuoS | South Dakota | 753231 |
| 42 | Tennessee | 5685230 | eessenneT | Tennessee | 5686230 |
| 43 | Texas | 20830810 | saxeT | Texas | 20831810 |
| 44 | Utah | 2238675 | hatU | Utah | 2239675 |
| 45 | Vermont | 609480 | tnomreV | Vermont | 610480 |
| 46 | Virginia | 6955790 | ainigriV | Virginia | 6956790 |
| 47 | Washington | 5863102 | notgnihsaW | Washington | 5864102 |
| 48 | West Virginia | 1805847 | ainigriV tseW | West Virginia | 1806847 |
| 49 | Wisconsin | 5372159 | nisnocsiW | Wisconsin | 5373159 |
| 50 | Wyoming | 490336 | gnimoyW | Wyoming | 491336 |
[9]:
from examples_base import *
# The state anonymizer reverses the state name.
# The population anonymizer adds 1000
shelf = Shelf({
'state': Dimension(Census.state, anonymizer='{fake:name}'),
'population': Metric(func.sum(Census.pop2000), anonymizer=lambda v: v+1000)
})
# To use the anonymizer, the extension class Anonymize must be used.
# and the anonymize flag must be True
recipe = Recipe(shelf=shelf, session=oven.Session(), extension_classes=[Anonymize])\
.dimensions('state').metrics('population').anonymize(True)
# Look at the output.
print(recipe.to_sql())
recipe.dataset.df
SELECT census.state AS state_raw,
sum(census.pop2000) AS population_raw
FROM census
GROUP BY census.state
[9]:
| state_raw | population_raw | state | state_id | population | |
|---|---|---|---|---|---|
| 0 | Alabama | 4438559 | Steven Williams | Alabama | 4439559 |
| 1 | Alaska | 608588 | Donald Calderon | Alaska | 609588 |
| 2 | Arizona | 5143931 | Shannon Bean | Arizona | 5144931 |
| 3 | Arkansas | 2672434 | Mr. Kyle Hurst | Arkansas | 2673434 |
| 4 | California | 33829442 | Stephanie Mitchell | California | 33830442 |
| 5 | Colorado | 4300877 | Alex Graham | Colorado | 4301877 |
| 6 | Connecticut | 3403620 | John Newton | Connecticut | 3404620 |
| 7 | Delaware | 782386 | Samantha Norman | Delaware | 783386 |
| 8 | District of Columbia | 568103 | Justin Taylor | District of Columbia | 569103 |
| 9 | Florida | 15976093 | Tanya Kelley | Florida | 15977093 |
| 10 | Georgia | 8161776 | Jacob Koch | Georgia | 8162776 |
| 11 | Hawaii | 1167027 | Natalie Walsh | Hawaii | 1168027 |
| 12 | Idaho | 1294560 | Michael Austin | Idaho | 1295560 |
| 13 | Illinois | 12405300 | Mr. Paul Olson | Illinois | 12406300 |
| 14 | Indiana | 6089161 | Natalie Mcfarland | Indiana | 6090161 |
| 15 | Iowa | 2926878 | Laurie Smith | Iowa | 2927878 |
| 16 | Kansas | 2675929 | Robert Baker | Kansas | 2676929 |
| 17 | Kentucky | 4028389 | Carol Wright | Kentucky | 4029389 |
| 18 | Louisiana | 4449489 | Michael Harrison | Louisiana | 4450489 |
| 19 | Maine | 1271694 | Cassandra Berry | Maine | 1272694 |
| 20 | Maryland | 5274039 | Matthew Warren | Maryland | 5275039 |
| 21 | Massachusetts | 6357515 | Michael Bryant | Massachusetts | 6358515 |
| 22 | Michigan | 9951873 | Michelle Nelson | Michigan | 9952873 |
| 23 | Minnesota | 4931897 | Rebekah Berg | Minnesota | 4932897 |
| 24 | Mississippi | 2830133 | Elaine Wood | Mississippi | 2831133 |
| 25 | Missouri | 5588759 | Kevin Johnson | Missouri | 5589759 |
| 26 | Montana | 899459 | Lindsey Adams | Montana | 900459 |
| 27 | Nebraska | 1705040 | Linda Wade | Nebraska | 1706040 |
| 28 | Nevada | 2010272 | Mark Hayes | Nevada | 2011272 |
| 29 | New Hampshire | 1239307 | Matthew Anderson | New Hampshire | 1240307 |
| 30 | New Jersey | 8420023 | Kathleen Little | New Jersey | 8421023 |
| 31 | New Mexico | 1809015 | Edward Andrews | New Mexico | 1810015 |
| 32 | New York | 18978668 | Mary James | New York | 18979668 |
| 33 | North Carolina | 7978581 | Matthew Myers | North Carolina | 7979581 |
| 34 | North Dakota | 633621 | Luke Huynh | North Dakota | 634621 |
| 35 | Ohio | 11355210 | Jessica Guerrero | Ohio | 11356210 |
| 36 | Oklahoma | 3430420 | Valerie Zimmerman | Oklahoma | 3431420 |
| 37 | Oregon | 3428319 | Jennifer Stephenson | Oregon | 3429319 |
| 38 | Pennsylvania | 12276157 | Timothy Johnson | Pennsylvania | 12277157 |
| 39 | Rhode Island | 1047200 | Michael Garcia | Rhode Island | 1048200 |
| 40 | South Carolina | 3983917 | Timothy Kramer | South Carolina | 3984917 |
| 41 | South Dakota | 752231 | Christopher Henson | South Dakota | 753231 |
| 42 | Tennessee | 5685230 | Michael Rodriguez PhD | Tennessee | 5686230 |
| 43 | Texas | 20830810 | Eric Cross | Texas | 20831810 |
| 44 | Utah | 2238675 | Rachael Pratt | Utah | 2239675 |
| 45 | Vermont | 609480 | Michelle Schultz | Vermont | 610480 |
| 46 | Virginia | 6955790 | Laura Summers | Virginia | 6956790 |
| 47 | Washington | 5863102 | Shannon Young | Washington | 5864102 |
| 48 | West Virginia | 1805847 | Connie Mitchell | West Virginia | 1806847 |
| 49 | Wisconsin | 5372159 | Evan Lee | Wisconsin | 5373159 |
| 50 | Wyoming | 490336 | Marcus Williams | Wyoming | 491336 |
Using an anonymizer injects the anonymize function as the last formatter. The original value is available as ingredient_raw.