5 Scripts
By publishing our R and Python scripts, we aim for transparency and reproducibility.
Our scripts can be used for
- Introduction for how to use the data.
- Reproduction of our analysis.
- To scrutinize how we work and analyse the data.
Any comments or questions are welcomed.
DISCLAIMER: OsloMet, ideas2evidence, Frischsenteret and the Ministry of Justice and Public Security do not take any responsibility for the use and interpretation of data from the National Crime Survey. The above mentioned further take no responsibility for any negative consequences that may arise as a result of the use of this data.
5.1 trygghetsundersokelsen.no
These script reproduces the statistics published at trygghetsundersokelsen.no
Content:
Ungrouped frequencies is calculated for all variables. But all variables is not plotted against all demographic variables. For each variable, the script exemplifies with a few demographic variables. But all demographic variables are used at some point. The demographic variables are as follows:
- Age
- Gender
- Education
- Immigration
- Married
- Children
- County
Weighting:
All frequencies are weighted using weight_edu
.The weight is based on demographic variables
(age, gender and geography) and education level.
5.1.1 R
library(tidyverse)
library(haven)
# data import
<- read_sav("Norwegian Crime Survey NSD v1.sav")
df
# ---------------------------------------------------- #
# General functions for recoding demographic variables #
# ---------------------------------------------------- #
# Recode {{immigration}}
<-
recode_immigration function(x){
factor(case_when(
== 1 ~ 1,
x == 2 ~ 2,
x %in% c(3, 4, 5) ~ 3,
x TRUE ~ NA_real_
),labels = c("No, have not immigrated to Norway",
"Have immigrated to Norway",
"Born in Norway, but both or one of the parents immigrated")
)}
# Recode {{gift}}
<-
recode_married function(x) {
factor(
case_when(
%in% c(1, 2) ~ 1,
x == 3 ~ 2,
x TRUE ~ NA_real_
),labels = c("Married or cohabitant", "Living alone")
)
}
# ------------------------------------------------------------------------------------------------ #
# urtrygg1: Percentage that feels unsafe if they go out alone at night in the area where they live #
# ------------------------------------------------------------------------------------------------ #
# all
|>
df count(urtrygg1, wt = weight_edu) |>
filter(urtrygg1 < 5) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(urtrygg1, wt = weight_edu) |>
filter(urtrygg1 < 5 & utdanning_kort <97) |>
mutate(
pct = n/sum(n)
)
# age
|>
df group_by(aldersgruppe_vuttrekk_dsf) |>
count(urtrygg1, wt = weight_edu) |>
filter(urtrygg1 < 5) |>
mutate(
pct = n/sum(n)
)
# map/county
|>
df group_by(fylkenr_dsf) |>
count(urtrygg1, wt = weight_edu) |>
filter(urtrygg1 < 5) |>
mutate(
pct = n/sum(n)
)
# ------------------------------------------------------------------- #
# urtrygg2: Percentage that always plan ahead to avoid crime exposure #
# ------------------------------------------------------------------- #
# all
|>
df count(urtrygg2, wt = weight_edu) |>
filter(urtrygg2 < 97) |>
mutate(
pct = n/sum(n)
)
# Children
|>
df group_by(barn) |>
count(urtrygg2, wt = weight_edu) |>
filter(urtrygg2 < 5 & barn < 97) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(urtrygg2, wt = weight_edu) |>
filter(urtrygg2 < 97) |>
mutate(
pct = n/sum(n)
)
# map/county
|>
df group_by(fylkenr_dsf) |>
count(urtrygg2, wt = weight_edu) |>
filter(urtrygg2 < 97) |>
mutate(
pct = n/sum(n)
)
# ----------------------------------------------------------------- #
# urtrygg3: Percentage that is worried about being exposed to crime #
# ----------------------------------------------------------------- #
# all
|>
df count(urtrygg3, wt = weight_edu) |>
filter(urtrygg3 < 5) |>
mutate(
pct = n/sum(n)
)
# immigration
|>
df mutate(
innvandret = recode_immigration(innvandret)
|>
) group_by(innvandret) |>
count(urtrygg3, wt = weight_edu) |>
filter(urtrygg3 < 5 & !is.na(innvandret)) |>
mutate(
pct = n/sum(n)
)
# married
|>
df mutate(
gift = recode_married(gift)
|>
) group_by(gift) |>
count(urtrygg3, wt = weight_edu) |>
filter(urtrygg3 < 5 & !is.na(gift)) |>
mutate(
pct = n/sum(n)
)
# ------------------------------------------------------------ #
# urkrim_1: Percentage that worries about exposure to burglary #
# ------------------------------------------------------------ #
# all
|>
df count(urkrim_1, wt = weight_edu) |>
filter(urkrim_1 < 6) |>
mutate(
pct = n/sum(n)
)
# children
|>
df group_by(barn) |>
count(urkrim_1, wt = weight_edu) |>
filter(urkrim_1 < 6 & barn < 97) |>
mutate(
pct = n/sum(n)
)
# county
|>
df group_by(fylkenr_dsf) |>
count(urkrim_1, wt = weight_edu) |>
filter(urkrim_1 < 6) |>
mutate(
pct = n/sum(n)
)
# --------------------------------------------------------- #
# urkrim_2: Percentage that worries about exposure to theft #
# --------------------------------------------------------- #
# all
|>
df count(urkrim_2, wt = weight_edu) |>
filter(urkrim_2 < 6) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(urkrim_2, wt = weight_edu) |>
filter(urkrim_2 < 6 & utdanning_kort < 97) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(urkrim_2, wt = weight_edu) |>
filter(urkrim_2 < 6) |>
mutate(
pct = n/sum(n)
)
# ------------------------------------------------------------------------------------------------- #
# urkrim_5: Percentage that worries about exposure for robbery with violence or threats of violence #
# ------------------------------------------------------------------------------------------------- #
# all
|>
df count(urkrim_5, wt = weight_edu) |>
filter(urkrim_5 < 6) |>
mutate(
pct = n/sum(n)
)
# immigration
|>
df mutate(
innvandret = recode_immigration(innvandret)
|>
) group_by(innvandret) |>
count(urkrim_5, wt = weight_edu) |>
filter(urkrim_5 < 6 & !is.na(innvandret)) |>
mutate(
pct = n/sum(n)
)
# age
|>
df group_by(aldersgruppe_vuttrekk_dsf) |>
count(urkrim_5, wt = weight_edu) |>
filter(urkrim_5 < 6) |>
mutate(
pct = n/sum(n)
)
# --------------------------------------------------------------------- #
# urkrim_7: Percentage that worries about exposure to physical violence #
# --------------------------------------------------------------------- #
# all
|>
df count(urkrim_7, wt = weight_edu) |>
filter(urkrim_7 < 6) |>
mutate(
pct = n/sum(n)
)
# married
|>
df mutate(
gift = recode_married(gift)
|>
) group_by(gift) |>
count(urkrim_7, wt = weight_edu) |>
filter(urkrim_7 < 6 & !is.na(gift)) |>
mutate(
pct = n/sum(n)
)
# children
|>
df group_by(barn) |>
count(urkrim_7, wt = weight_edu) |>
filter(urkrim_7 < 6 & barn < 97) |>
mutate(
pct = n/sum(n)
)
# -------------------------------------------------------------- #
# urkrim_8: Percentage that worries about exposure to hate crime #
# -------------------------------------------------------------- #
# all
|>
df count(urkrim_8, wt = weight_edu) |>
filter(urkrim_8 < 6) |>
mutate(
pct = n/sum(n)
)
# age
|>
df group_by(aldersgruppe_vuttrekk_dsf) |>
count(urkrim_8, wt = weight_edu) |>
filter(urkrim_8 < 6) |>
mutate(
pct = n/sum(n)
)
# children
|>
df group_by(barn) |>
count(urkrim_8, wt = weight_edu) |>
filter(urkrim_8 < 6 & barn < 97) |>
mutate(
pct = n/sum(n)
)
# ------------------------------------------------------------------------- #
# urkrim_9: Percentage that worries about exposure to sexual abuse or abuse #
# ------------------------------------------------------------------------- #
# all
|>
df count(urkrim_9, wt = weight_edu) |>
filter(urkrim_9 < 6) |>
mutate(
pct = n/sum(n)
)
# immigration
|>
df mutate(
innvandret = recode_immigration(innvandret)
|>
) group_by(innvandret) |>
count(urkrim_9, wt = weight_edu) |>
filter(urkrim_9 < 6 & !is.na(innvandret)) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(urkrim_9, wt = weight_edu) |>
filter(urkrim_9 < 6 & utdanning_kort < 97) |>
mutate(
pct = n/sum(n)
)
# ----------------------------------------------------------------------------------------------------------- #
# urkrim_10: Percentage that worries about dissemination of photos, videos and information against their will #
# ----------------------------------------------------------------------------------------------------------- #
# all
|>
df count(urkrim_10, wt = weight_edu) |>
filter(urkrim_10 < 6) |>
mutate(
pct = n/sum(n)
)
# married
|>
df mutate(
gift = recode_married(gift)
|>
) group_by(gift) |>
count(urkrim_10, wt = weight_edu) |>
filter(urkrim_10 < 6 & !is.na(gift)) |>
mutate(
pct = n/sum(n)
)
# immigration
|>
df mutate(
innvandret = recode_immigration(innvandret)
|>
) group_by(innvandret) |>
count(urkrim_10, wt = weight_edu) |>
filter(urkrim_10 < 6 & !is.na(innvandret)) |>
mutate(
pct = n/sum(n)
)
# ----------------------------------------------------------------------------------------------- #
# utsibilde1: Percentage that is exposed to sharing/spread of images or videos against their will #
# ----------------------------------------------------------------------------------------------- #
# all
|>
df count(utsibilde1, wt = weight_edu) |>
filter(utsibilde1 < 97) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(utsibilde1, wt = weight_edu) |>
filter(utsibilde1 < 97 & utdanning_kort < 97) |>
mutate(
pct = n/sum(n)
)
# children
|>
df group_by(barn) |>
count(utsibilde1, wt = weight_edu) |>
filter(utsibilde1 < 97 & barn < 97) |>
mutate(
pct = n/sum(n)
)
# ---------------------------------------------------------------------------------------- #
# utsiident1: Percentage that is exposed to misuse of personal information on the internet #
# ---------------------------------------------------------------------------------------- #
# all
|>
df count(utsiident1, wt = weight_edu) |>
filter(utsiident1 < 97) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(utsiident1, wt = weight_edu) |>
filter(utsiident1 < 97) |>
mutate(
pct = n/sum(n)
)
# age
|>
df group_by(aldersgruppe_vuttrekk_dsf) |>
count(utsiident1, wt = weight_edu) |>
filter(utsiident1 < 97) |>
mutate(
pct = n/sum(n)
)
# ----------------------------------------------------------- #
# uttbol1: Percentage that is exposed to residential burglary #
# ----------------------------------------------------------- #
# all
|>
df count(uttbol1, wt = weight_edu) |>
filter(uttbol1 < 97) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(uttbol1, wt = weight_edu) |>
filter(uttbol1 < 97) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(uttbol1, wt = weight_edu) |>
filter(uttbol1 < 97 & utdanning_kort < 97) |>
mutate(
pct = n/sum(n)
)
# ------------------------------------------------------------------ #
# uttlom1: Percentage that is exposed to theft of money or valuables #
# ------------------------------------------------------------------ #
# all
|>
df count(uttlom1, wt = weight_edu) |>
filter(uttlom1 < 97) |>
mutate(
pct = n/sum(n)
)
# children
|>
df group_by(barn) |>
count(uttlom1, wt = weight_edu) |>
filter(uttlom1 < 97 & barn < 97) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(uttlom1, wt = weight_edu) |>
filter(uttlom1 < 97) |>
mutate(
pct = n/sum(n)
)
# ---------------------------------------------------------------------------------------- #
# uttran1: Percentage that is exposed to robbery with threats of violence or with violence #
# ---------------------------------------------------------------------------------------- #
# all
|>
df count(uttran1, wt = weight_edu) |>
filter(uttran1 < 97) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(uttran1, wt = weight_edu) |>
filter(uttran1 < 97 & utdanning_kort < 97) |>
mutate(
pct = n/sum(n)
)
# immigration
|>
df mutate(
innvandret = recode_immigration(innvandret)
|>
) group_by(innvandret) |>
count(uttran1, wt = weight_edu) |>
filter(uttran1 < 97 & !is.na(innvandret)) |>
mutate(
pct = n/sum(n)
)
# ----------------------------------------------- #
# uttruss1: Percentage that is exposed to threats #
# ----------------------------------------------- #
# all
|>
df count(uttruss1, wt = weight_edu) |>
filter(uttruss1 < 97) |>
mutate(
pct = n/sum(n)
)
# married
|>
df mutate(
gift = recode_married(gift)
|>
) group_by(gift) |>
count(uttruss1, wt = weight_edu) |>
filter(uttruss1 < 97 & !is.na(gift)) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(uttruss1, wt = weight_edu) |>
filter(uttruss1 < 97) |>
mutate(
pct = n/sum(n)
)
# ------------------------------------------------- #
# uttsyk1: Percentage that is exposed to bike theft #
# ------------------------------------------------- #
# all
|>
df count(uttsyk1, wt = weight_edu) |>
filter(uttsyk1 < 3) |>
mutate(
pct = n/sum(n)
)
# age
|>
df group_by(aldersgruppe_vuttrekk_dsf) |>
count(uttsyk1, wt = weight_edu) |>
filter(uttsyk1 < 3) |>
mutate(
pct = n/sum(n)
)
# education
|>
df group_by(utdanning_kort) |>
count(uttsyk1, wt = weight_edu) |>
filter(uttsyk1 < 3 & utdanning_kort < 97) |>
mutate(
pct = n/sum(n)
)
# --------------------------------------------------------------- #
# utvrist: Percentage that is exposed to violent shakes or pushes #
# --------------------------------------------------------------- #
# all
|>
df count(utvrist, wt = weight_edu) |>
filter(utvrist < 97) |>
mutate(
pct = n/sum(n)
)
# married
|>
df mutate(
gift = recode_married(gift)
|>
) group_by(gift) |>
count(utvrist, wt = weight_edu) |>
filter(utvrist < 97 & !is.na(gift)) |>
mutate(
pct = n/sum(n)
)
# immigration
|>
df mutate(
innvandret = recode_immigration(innvandret)
|>
) group_by(innvandret) |>
count(utvrist, wt = weight_edu) |>
filter(utvrist < 97 & !is.na(innvandret)) |>
mutate(
pct = n/sum(n)
)
# --------------------------------------------------------------------- #
# utvslag: Percentage that is exposed to hits with fist or hard objects #
# --------------------------------------------------------------------- #
# all
|>
df count(utvslag, wt = weight_edu) |>
filter(utvslag < 97) |>
mutate(
pct = n/sum(n)
)
# age
|>
df group_by(aldersgruppe_vuttrekk_dsf) |>
count(utvslag, wt = weight_edu) |>
filter(utvslag < 97) |>
mutate(
pct = n/sum(n)
)
# gender
|>
df group_by(kjonn_dsf) |>
count(utvslag, wt = weight_edu) |>
filter(utvslag < 97) |>
mutate(
pct = n/sum(n)
)
5.1.2 Python
# This script reproduces the statistics published at trygghetsundersokelsen.no
#
# Please confer the following site for how to access data from the National Crime Survey:
# https://trygghetsundersokelsen.no/data.html#en
#
# For data documentation: https://data.trygghetsundersokelsen.no
#
# Content:
# Frequencies for the whole population is calculated for all variables.
# But all variables is not plotted against all demographic variables.
# For each variable, the script applies three demographic variables.
# But all demographic variables are used at some point.
# The demographic variables are as follows:
#
# - Age
# - Gender
# - Education
# - Immigration
# - Married
# - Children
# - County
#
# **Weighting**: All frequencies are weighted using `weight_edu`. The weight is based on demographic variables (age, gender and geography) and education level.
#
# **DISCLAIMER**: OsloMet, ideas2evidence, Frischsenteret and the Ministry of Justice and Public Security do not take any responsibility for the use and interpretation of data from the National Crime Survey. The above mentioned further take no responsibility for any negative consequences that may arise as a result of the use of this data.
# Data import
import pandas as pd
import numpy as np
with open("path.txt", "r", encoding="utf8") as file:
= file.read().replace("\n", "")
path
= pd.read_spss(path + "Norwegian Crime Survey NSD v1.sav")
df
#grouping features
= ['aldersgruppe_vuttrekk_dsf', 'kjonn_dsf', 'utdanning_kort', 'innvandret', 'gift', 'barn', 'fylkenr_dsf']
groups
# Recoding of features.
# All features used are recoded/mapped so that missing-categories are NaN. In addition, some response alternatives are combined/merged.
= {
immigration_mapping "No": "No",
"I myself have immigrated to Norway": "Have immigrated to Norway",
"Both my parents have immigrated to Norway, but I was born in Norway": "Born in Norway, but both or one of the parents immigrated",
"Mother has immigrated to Norway, but my father and I have not immigrated": "Born in Norway, but both or one of the parents immigrated",
"Father has immigrated to Norway, but my mother and I have not immigrated": "Born in Norway, but both or one of the parents immigrated",
#"Not answered": NaN
}
= df.assign(innvandret = df.innvandret.map(immigration_mapping))
df
df.innvandret.value_counts()
= {
married_mapping "Yes, married/registered partner": "Married or cohabitant",
"No, living alone": "Living alone",
"Yes, cohabitant": "Married or cohabitant"
}
= df.assign(gift = df.gift.map(married_mapping))
df
df.gift.value_counts()
'utdanning_kort'] = df['utdanning_kort'].map({
df["Higher education": "Higher education",
"Vocational school / High school": "Vocational school / High school",
"No education / Primary school": "No education / Primary school"
})
df.utdanning_kort.value_counts()
= df.assign(barn = df.barn.map({
df "Yes": "Has children",
"No": "Does not have children"
}))
df.barn.value_counts()
= ['urtrygg1', 'urtrygg3']
safety1 = ['urtrygg2']
safety2 = ['urkrim_1', 'urkrim_2', 'urkrim_5', 'urkrim_7', 'urkrim_8', 'urkrim_9', 'urkrim_10']
worry = ['utsibilde1', 'utsiident1', 'uttbol1', 'uttlom1', 'uttran1', 'uttruss1', 'uttsyk1'] #utvrist, utvslag
exposure
# Recode safety variables
'urtrygg1'] = df['urtrygg1'].map({
df['Very safe': 'Very safe',
'Pretty safe': 'Pretty safe',
'Pretty insecure': 'Pretty insecure',
'Very insecure': 'Very insecure'
})
'urtrygg2'] = df['urtrygg2'].map({
df['Yes, I always plan so that I am as safe as possible': 'Always plan ahead',
'Sometimes / it depends on the situation': 'Depends on the situation/Does not think about it',
'No, I\'m not thinking about whether I could be the victim of a crime': 'Depends on the situation/Does not think about it'
})
'urtrygg3'] = df['urtrygg3'].map({
df['To a large degree': 'To a large degree',
'To some degree': 'To some degree',
'To a small degree': 'To a small degree',
'Not at all': 'Not at all'
})
# Recode urkrim-variables
= {
urkrim_mapping "Quite often": "Quite often",
"Pretty often": "Pretty often",
"Pretty rare": "Pretty rare",
"Quite rare": "Quite rare",
"Never": "Never"
}
for feature in worry:
= df[feature].map(urkrim_mapping)
df[feature]
# Recode exposure-variables
## utvslag slightly different response scale on utvslag
'utvslag'] = df['utvslag'].map({
df["Yes, (number of times this happened to you in 2020)": "Yes",
"No": "No"
})
'utvrist'] = df['utvrist'].map({
df["Yes, (number of times this happened to you in 2020)": "Yes",
"No": "No"
})
# same mapping for the rest of them
= {
exposure_mapping "Yes, (number of times)": "Yes",
"No": "No"
}
for feature in exposure:
= df[feature].map(exposure_mapping)
df[feature]
## Functions
#`weighted_frequency` calculates the frequency of `feature`, using the education weight `weight_edu`.
#`weighted_frequency` calculates the frequency of `feature` by `group`, using the education weight `weight_edu`.
def weighted_frequency(df, feature, print_ = False):
= df.groupby([feature])['weight_edu'].sum()
series
# convert pd.series to pd.dataframe
= series.to_frame()
x
# rownames to new columns
= feature
x.index.name =True)
x.reset_index(inplace
# rename column
={'weight_edu': 'count'}, inplace = True)
x.rename(columns
# remove decimals
= x.round(0)
x
# calculate percentage
'pct'] = x['count'] / sum(x['count'])
x[
if(print_):
print(x)
return x
def weighted_frequency_by(df, group, feature, print_ = False):
= df.groupby([group, feature])['weight_edu'].sum()
series
# convert pd.series to pd.dataframe
= series.to_frame()
x
# rownames to new columns
= feature
x.index.name =True)
x.reset_index(inplace
# rename column
={'weight_edu': 'count'}, inplace = True)
x.rename(columns
# remove decimals
= x.round(0)
x
# Calculate group count and percent per group
'group_count'] = x.groupby([group]).transform('sum', numeric_only = True)
x['pct'] = x['count'] / x['group_count']
x[
if(print_):
print(x)
return x
## Analysis
# In the following, the data visualized on www.trygghetsundersokelsen.no is reproduces.
# ------------------------------------------------------------------------------------------------ #
# urtrygg1: Percentage that feels unsafe if they go out alone at night in the area where they live #
# ------------------------------------------------------------------------------------------------ #
'urtrygg1')
weighted_frequency(df,
'utdanning_kort', 'urtrygg1')
weighted_frequency_by(df,
'aldersgruppe_vuttrekk_dsf', 'urtrygg1')
weighted_frequency_by(df,
'fylkenr_dsf', 'urtrygg1')
weighted_frequency_by(df,
# ------------------------------------------------------------------- #
# urtrygg2: Percentage that always plan ahead to avoid crime exposure #
# ------------------------------------------------------------------- #
'urtrygg2')
weighted_frequency(df,
'barn', 'urtrygg2')
weighted_frequency_by(df,
'kjonn_dsf', 'urtrygg2')
weighted_frequency_by(df,
'fylkenr_dsf', 'urtrygg2')
weighted_frequency_by(df,
# ----------------------------------------------------------------- #
# urtrygg3: Percentage that is worried about being exposed to crime #
# ----------------------------------------------------------------- #
'urtrygg3')
weighted_frequency(df,
'innvandret', 'urtrygg3')
weighted_frequency_by(df,
'gift', 'urtrygg3')
weighted_frequency_by(df,
# ------------------------------------------------------------ #
# urkrim_1: Percentage that worries about exposure to burglary #
# ------------------------------------------------------------ #
'urkrim_1')
weighted_frequency(df,
'barn', 'urkrim_1')
weighted_frequency_by(df,
'barn', 'urkrim_2')
weighted_frequency_by(df,
# ------- #
# Worries #
# ------- #
for feature in worry:
weighted_frequency(df, feature)
# -------------------------------------- #
# Worries by demographic characteristics #
# -------------------------------------- #
for feature in worry:
for group in groups:
= True)
weighted_frequency_by(df, group, feature, print_
# -------- #
# Exposure #
# -------- #
for feature in exposure:
= True)
weighted_frequency(df, feature, print_
# -------------------------------------- #
# Exposure by demographic characteristics #
# -------------------------------------- #
for feature in exposure:
for group in groups:
= True) weighted_frequency_by(df, group, feature, print_