# pip install ydf temporian -U
import ydf
import temporian as tp
import pandas as pd
import datetime
import os
What are time sequences?¶
Time-sequences or Event-sequences are time-stamped events with feature-data that are not uniformly sampled. There are many examples for this type of data: banking transaction logs, sales logs, network logs, patient measures, treatment logs and many others. Decision Forests are well-suited for handling event sequences because the output of preprocessing event sequences is tabular and can be sparse and noisy.
YDF models benefit greatly from feature preprocessing when working with temporal data. Basic operations can be handled with generic data processing tools like Pandas. For more complex use cases, temporal feature preprocessing tools are very helpful. This tutorial demonstrates how to integrate YDF with the Temporian data preprocessing library.
In this notebook, our goal is to determine if transactions from the Reproducible Machine Learning for Credit Card Fraud Detection dataset are fraudulent. Each transaction is an event with an amount, client ID, and terminal ID. Our model will consider the transaction features as well as related transactions (e.g., transactions from the same client and transactions from the same terminal). For that, we will pre-process the event data such that the YDF model can detect frauds.
Note: This tutorial is a simplified adaptation of the Detecting payment card fraud with Temporian and TensorFlow Decision Forests tutorial.
Note2: Yggdrasil Decision Forests and Temporian share part of their development team.
Download dataset¶
Transaction data is available in CSV format, with one file for each day between April 1, 2018, and September 30, 2018. We use transactions from April 1, 2018, to August 31, 2018 (inclusive) for training, and transactions from September 1, 2018, to September 30, 2018, for evaluation.
start_date = datetime.date(2018, 4, 1)
end_date = datetime.date(2018, 9, 30)
train_test_split = datetime.datetime(2018, 9, 1)
# List the input csv files
filenames = []
while start_date <= end_date:
filenames.append(f"{start_date}.pkl")
start_date += datetime.timedelta(days=1)
print(f"{len(filenames)} dates")
# Download and load files
print("Loading files (this step can take a minute)")
def load_date(idx_and_filename):
if (idx_and_filename[0]%10) == 0:
print(f"[{idx_and_filename[0]}/{len(filenames)}]", end="", flush=True)
return pd.read_pickle(f"https://github.com/Fraud-Detection-Handbook/simulated-data-raw/raw/main/data/{idx_and_filename[1]}")
transactions_pd = pd.concat(map(load_date, enumerate(filenames)))
print("Done!")
print(f"Found {len(transactions_pd)} transactions")
183 dates Loading files (this step can take a minute) [0/183][10/183][20/183][30/183][40/183][50/183][60/183][70/183][80/183][90/183][100/183][110/183][120/183][130/183][140/183][150/183][160/183][170/183][180/183]Done! Found 1754155 transactions
transactions_pd
TRANSACTION_ID | TX_DATETIME | CUSTOMER_ID | TERMINAL_ID | TX_AMOUNT | TX_TIME_SECONDS | TX_TIME_DAYS | TX_FRAUD | TX_FRAUD_SCENARIO | |
---|---|---|---|---|---|---|---|---|---|
0 | 0 | 2018-04-01 00:00:31 | 596 | 3156 | 57.16 | 31 | 0 | 0 | 0 |
1 | 1 | 2018-04-01 00:02:10 | 4961 | 3412 | 81.51 | 130 | 0 | 0 | 0 |
2 | 2 | 2018-04-01 00:07:56 | 2 | 1365 | 146.00 | 476 | 0 | 0 | 0 |
3 | 3 | 2018-04-01 00:09:29 | 4128 | 8737 | 64.49 | 569 | 0 | 0 | 0 |
4 | 4 | 2018-04-01 00:10:34 | 927 | 9906 | 50.99 | 634 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1754150 | 1754150 | 2018-09-30 23:56:36 | 161 | 655 | 54.24 | 15810996 | 182 | 0 | 0 |
1754151 | 1754151 | 2018-09-30 23:57:38 | 4342 | 6181 | 1.23 | 15811058 | 182 | 0 | 0 |
1754152 | 1754152 | 2018-09-30 23:58:21 | 618 | 1502 | 6.62 | 15811101 | 182 | 0 | 0 |
1754153 | 1754153 | 2018-09-30 23:59:52 | 4056 | 3067 | 55.40 | 15811192 | 182 | 0 | 0 |
1754154 | 1754154 | 2018-09-30 23:59:57 | 3542 | 9849 | 23.59 | 15811197 | 182 | 0 | 0 |
1754155 rows × 9 columns
TX_FRAUD
indicates if a transaction is a fraud. This is the label.
Prepare data¶
The data we have is currently stored in a Pandas DataFrame. We want to convert it to a Temporian EventSet so that we can apply preprocessing.
transactions_es = tp.from_pandas(
transactions_pd[["TX_DATETIME", "CUSTOMER_ID", "TERMINAL_ID",
"TX_AMOUNT", "TX_FRAUD"]],
timestamps="TX_DATETIME")
transactions_es
WARNING:root:Feature "CUSTOMER_ID" is an array of numpy.object_ and will be casted to numpy.string_ (Note: numpy.string_ is equivalent to numpy.bytes_). WARNING:root:Feature "TERMINAL_ID" is an array of numpy.object_ and will be casted to numpy.string_ (Note: numpy.string_ is equivalent to numpy.bytes_).
timestamp | CUSTOMER_ID | TERMINAL_ID | TX_AMOUNT | TX_FRAUD |
---|---|---|---|---|
2018-04-01 00:00:31+00:00 | 596 | 3156 | 57.16 | 0 |
2018-04-01 00:02:10+00:00 | 4961 | 3412 | 81.51 | 0 |
2018-04-01 00:07:56+00:00 | 2 | 1365 | 146 | 0 |
2018-04-01 00:09:29+00:00 | 4128 | 8737 | 64.49 | 0 |
2018-04-01 00:10:34+00:00 | 927 | 9906 | 50.99 | 0 |
… | … | … | … | … |
Let's plot the transactions of CUSTOMER_ID=3774
. We can see a few frauds.
transactions_es.filter(tp.equal(transactions_es["CUSTOMER_ID"],"3774")).plot()
In this dataset, it takes one week to determine if a transaction is fraudulent. For each transaction, we will calculate the number of fraudulent transactions in the same terminal in the last 1 to 4 weeks. We will also calculate the number of transactions and the sum of transactions in the last day, week, and four weeks for this terminal. Finally, we will extract the hour of the day and the day of the week for each transaction.
# @tp.compile # Uncomment tp.compile to run faster
def extract_features(transactions: tp.types.EventSetOrNode) -> dict[str, tp.types.EventSetOrNode]:
per_terminal_features = []
# Index the transactions per customer
per_terminal = transactions.add_index("TERMINAL_ID")
# 1. NUM PREVIOUS FRAUDS
# Lag the frauds by 1 week
lagged_fraud_per_terminal = per_terminal["TX_FRAUD"].lag(tp.duration.weeks(1))
# Moving sum of transactions over the last 4 weeks
per_terminal_features.append(
lagged_fraud_per_terminal
.moving_sum(tp.duration.weeks(4), sampling=per_terminal)
.rename("per_terminal.moving_sum_frauds"))
# 2. NUM PAST TRANSACTIONS AND AMOUNT
for day in [1, 7, 4*7]:
per_terminal_features.append(
per_terminal
.moving_count(tp.duration.days(day))
.rename(f"per_terminal.moving_count_transaction_{day}d"))
per_terminal_features.append(
per_terminal["TX_AMOUNT"]
.moving_sum(tp.duration.days(day))
.rename(f"per_terminal.moving_sum_transaction_{day}d"))
# 3. CALENDAR FEATURES
# Hour of day and day of week of each transaction.
per_terminal_features.append(per_terminal.calendar_hour())
per_terminal_features.append(per_terminal.calendar_day_of_week())
# Aggregate the raw data and features
return tp.glue(
per_terminal,
*per_terminal_features).drop_index("TERMINAL_ID")
feature_transactions_es = extract_features(transactions_es)
feature_transactions_es
timestamp | CUSTOMER_ID | TX_AMOUNT | TX_FRAUD | per_terminal.moving_sum_frauds | per_terminal.moving_count_transaction_1d | per_terminal.moving_sum_transaction_1d | per_terminal.moving_count_transaction_7d | per_terminal.moving_sum_transaction_7d | per_terminal.moving_count_transaction_28d | per_terminal.moving_sum_transaction_28d | calendar_hour | calendar_day_of_week | TERMINAL_ID |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-04-01 00:00:31+00:00 | 596 | 57.16 | 0 | 0 | 1 | 57.16 | 1 | 57.16 | 1 | 57.16 | 0 | 6 | 3156 |
2018-04-01 00:02:10+00:00 | 4961 | 81.51 | 0 | 0 | 1 | 81.51 | 1 | 81.51 | 1 | 81.51 | 0 | 6 | 3412 |
2018-04-01 00:07:56+00:00 | 2 | 146 | 0 | 0 | 1 | 146 | 1 | 146 | 1 | 146 | 0 | 6 | 1365 |
2018-04-01 00:09:29+00:00 | 4128 | 64.49 | 0 | 0 | 1 | 64.49 | 1 | 64.49 | 1 | 64.49 | 0 | 6 | 8737 |
2018-04-01 00:10:34+00:00 | 927 | 50.99 | 0 | 0 | 1 | 50.99 | 1 | 50.99 | 1 | 50.99 | 0 | 6 | 9906 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … |
We plot the features for our favorite customer:
feature_transactions_es.filter(tp.equal(feature_transactions_es["CUSTOMER_ID"],"3774")).plot()
Train and evaluate model¶
We split the dataset between training and testing, convert the Temporian EventSet back to a Pandas DataFrame, and train a YDF model.
is_train = feature_transactions_es.timestamps() < train_test_split.timestamp()
is_test = ~is_train
train_feature_transactions_es = feature_transactions_es.filter(is_train)
test_feature_transactions_es = feature_transactions_es.filter(is_test)
print(f"{train_feature_transactions_es.num_events()} training transactions")
print(f"{test_feature_transactions_es.num_events()} testing transactions")
1466091 training transactions 288064 testing transactions
train_ds_pd = tp.to_pandas(train_feature_transactions_es)
test_ds_pd = tp.to_pandas(test_feature_transactions_es)
print(f"{len(train_ds_pd)} training examples")
print(f"{len(test_ds_pd)} testing examples")
1466091 training examples 288064 testing examples
train_ds_pd
CUSTOMER_ID | TX_AMOUNT | TX_FRAUD | per_terminal.moving_sum_frauds | per_terminal.moving_count_transaction_1d | per_terminal.moving_sum_transaction_1d | per_terminal.moving_count_transaction_7d | per_terminal.moving_sum_transaction_7d | per_terminal.moving_count_transaction_28d | per_terminal.moving_sum_transaction_28d | calendar_hour | calendar_day_of_week | TERMINAL_ID | timestamp | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 596 | 57.16 | 0 | 0 | 1 | 57.16 | 1 | 57.16 | 1 | 57.16 | 0 | 6 | 3156 | 1.522541e+09 |
1 | 4961 | 81.51 | 0 | 0 | 1 | 81.51 | 1 | 81.51 | 1 | 81.51 | 0 | 6 | 3412 | 1.522541e+09 |
2 | 2 | 146.00 | 0 | 0 | 1 | 146.00 | 1 | 146.00 | 1 | 146.00 | 0 | 6 | 1365 | 1.522541e+09 |
3 | 4128 | 64.49 | 0 | 0 | 1 | 64.49 | 1 | 64.49 | 1 | 64.49 | 0 | 6 | 8737 | 1.522541e+09 |
4 | 927 | 50.99 | 0 | 0 | 1 | 50.99 | 1 | 50.99 | 1 | 50.99 | 0 | 6 | 9906 | 1.522541e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1466086 | 1321 | 79.06 | 0 | 0 | 2 | 123.09 | 8 | 342.55 | 26 | 941.59 | 21 | 4 | 229 | 1.535753e+09 |
1466087 | 4202 | 13.53 | 0 | 0 | 3 | 216.03 | 5 | 315.35 | 24 | 1233.40 | 21 | 4 | 5559 | 1.535753e+09 |
1466088 | 1879 | 19.64 | 0 | 0 | 1 | 19.64 | 7 | 274.47 | 26 | 1046.89 | 21 | 4 | 601 | 1.535753e+09 |
1466089 | 1864 | 73.29 | 0 | 0 | 4 | 192.22 | 13 | 598.69 | 25 | 1106.18 | 21 | 4 | 4571 | 1.535753e+09 |
1466090 | 409 | 56.21 | 0 | 0 | 2 | 145.79 | 8 | 568.48 | 33 | 1959.01 | 21 | 4 | 8457 | 1.535753e+09 |
1466091 rows × 14 columns
learner = ydf.GradientBoostedTreesLearner(label="TX_FRAUD",
features=["per_terminal.moving_sum_frauds",
"per_terminal.moving_count_transaction_1d",
"per_terminal.moving_count_transaction_7d",
"per_terminal.moving_count_transaction_28d",
"calendar_hour",
"calendar_day_of_week",
],
num_trees=100, # Speed-up the training
)
model = learner.train(train_ds_pd)
Train model on 1466091 examples Model trained in 0:00:40.887605
We can evaluate the model on the test dataset.
model.evaluate(test_ds_pd)
Label \ Pred | 0 | 1 |
---|---|---|
0 | 284702 | 813 |
1 | 1770 | 779 |
Understand model¶
Although the model does not operate on the raw transaction data directly, but rather on the preprocessed features, it can be interpreted. For example, examining the variable importance, we can see that the feature per_terminal.moving_sum_frauds
is the most important to the model while the calendar_day_of_week
does not matter much. We can interpret this as meaning that if a client has had past fraudulent transactions, it is a good indicator of future fraudulent transactions.
model.describe()
Task : CLASSIFICATION
Label : TX_FRAUD
Features (6) : per_terminal.moving_sum_frauds per_terminal.moving_count_transaction_1d per_terminal.moving_count_transaction_7d per_terminal.moving_count_transaction_28d calendar_hour calendar_day_of_week
Weights : None
Trained with tuner : No
Model size : 1406 kB
Number of records: 1466091 Number of columns: 7 Number of columns by type: NUMERICAL: 6 (85.7143%) CATEGORICAL: 1 (14.2857%) Columns: NUMERICAL: 6 (85.7143%) 0: "per_terminal.moving_sum_frauds" NUMERICAL mean:0.197974 min:0 max:53 sd:1.59577 1: "per_terminal.moving_count_transaction_1d" NUMERICAL mean:1.99715 min:1 max:12 sd:1.02114 2: "per_terminal.moving_count_transaction_7d" NUMERICAL mean:7.84226 min:1 max:28 sd:3.1002 3: "per_terminal.moving_count_transaction_28d" NUMERICAL mean:26.4539 min:1 max:79 sd:9.81157 4: "calendar_hour" NUMERICAL mean:11.4991 min:0 max:23 sd:5.05594 5: "calendar_day_of_week" NUMERICAL mean:2.9861 min:0 max:6 sd:2.00096 CATEGORICAL: 1 (14.2857%) 6: "TX_FRAUD" CATEGORICAL has-dict vocab-size:3 zero-ood-items most-frequent:"0" 1453959 (99.1725%) Terminology: nas: Number of non-available (i.e. missing) values. ood: Out of dictionary. manually-defined: Attribute whose type is manually defined by the user, i.e., the type was not automatically inferred. tokenized: The attribute value is obtained through tokenization. has-dict: The attribute is attached to a string dictionary e.g. a categorical attribute stored as a string. vocab-size: Number of unique values.
The following evaluation is computed on the validation or out-of-bag dataset.
Task: CLASSIFICATION Label: TX_FRAUD Loss (BINOMIAL_LOG_LIKELIHOOD): 0.0692174 Accuracy: 0.99269 CI95[W][0 1] ErrorRate: : 0.00730968 Confusion Table: truth\prediction 0 1 0 145251 236 1 836 332 Total: 146655
Variable importances measure the importance of an input feature for a model.
1. "per_terminal.moving_sum_frauds" 0.900000 ################ 2. "per_terminal.moving_count_transaction_28d" 0.290564 ## 3. "per_terminal.moving_count_transaction_7d" 0.215782 4. "calendar_hour" 0.190089 5. "calendar_day_of_week" 0.188707 6. "per_terminal.moving_count_transaction_1d" 0.180135
1. "per_terminal.moving_sum_frauds" 88.000000 ################ 2. "per_terminal.moving_count_transaction_28d" 8.000000 # 3. "per_terminal.moving_count_transaction_7d" 2.000000 4. "calendar_day_of_week" 1.000000
1. "per_terminal.moving_count_transaction_28d" 859.000000 ################ 2. "per_terminal.moving_sum_frauds" 824.000000 ############### 3. "per_terminal.moving_count_transaction_7d" 525.000000 ######## 4. "calendar_hour" 306.000000 ### 5. "calendar_day_of_week" 214.000000 # 6. "per_terminal.moving_count_transaction_1d" 132.000000
1. "per_terminal.moving_sum_frauds" 2574.658942 ################ 2. "per_terminal.moving_count_transaction_28d" 692.886033 #### 3. "per_terminal.moving_count_transaction_7d" 305.556677 # 4. "calendar_hour" 83.875978 5. "calendar_day_of_week" 79.349436 6. "per_terminal.moving_count_transaction_1d" 34.315195
Those variable importances are computed during training. More, and possibly more informative, variable importances are available when analyzing a model on a test dataset.
Only printing the first tree.
Tree #0: "per_terminal.moving_sum_frauds">=2.5 [s:0.00122566 n:1319436 np:10761 miss:0] ; pred:3.22407e-10 ├─(pos)─ "per_terminal.moving_sum_frauds">=18.5 [s:0.0176001 n:10761 np:3536 miss:0] ; pred:4.68509 | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=29.5 [s:0.00771428 n:3536 np:2178 miss:0] ; pred:2.38384 | | ├─(pos)─ "per_terminal.moving_sum_frauds">=24.5 [s:0.0209201 n:2178 np:1388 miss:0] ; pred:3.22545 | | | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=37.5 [s:0.00556346 n:1388 np:633 miss:0] ; pred:1.90128 | | | | ├─(pos)─ pred:2.8898 | | | | └─(neg)─ pred:1.07249 | | | └─(neg)─ "per_terminal.moving_count_transaction_28d">=36.5 [s:0.0096319 n:790 np:206 miss:0] ; pred:5 | | | ├─(pos)─ pred:5 | | | └─(neg)─ pred:4.84463 | | └─(neg)─ "per_terminal.moving_sum_frauds">=22.5 [s:0.00322549 n:1358 np:543 miss:0] ; pred:1.03403 | | ├─(pos)─ "per_terminal.moving_sum_frauds">=23.5 [s:0.000536586 n:543 np:399 miss:0] ; pred:0.189689 | | | ├─(pos)─ pred:0.0208172 | | | └─(neg)─ pred:0.657606 | | └─(neg)─ "per_terminal.moving_count_transaction_28d">=25.5 [s:0.00624813 n:815 np:378 miss:1] ; pred:1.59659 | | ├─(pos)─ pred:2.62795 | | └─(neg)─ pred:0.704466 | └─(neg)─ "per_terminal.moving_sum_frauds">=3.5 [s:0.00906797 n:7225 np:6188 miss:0] ; pred:5 | ├─(pos)─ "per_terminal.moving_sum_frauds">=9.5 [s:0.0036483 n:6188 np:3617 miss:0] ; pred:5 | | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=22.5 [s:0.0138705 n:3617 np:2748 miss:1] ; pred:5 | | | ├─(pos)─ pred:5 | | | └─(neg)─ pred:3.12495 | | └─(neg)─ "per_terminal.moving_count_transaction_28d">=27.5 [s:0.00442462 n:2571 np:1083 miss:0] ; pred:5 | | ├─(pos)─ pred:5 | | └─(neg)─ pred:5 | └─(neg)─ "per_terminal.moving_count_transaction_28d">=28.5 [s:0.0251879 n:1037 np:616 miss:0] ; pred:2.98852 | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=47.5 [s:0.00377678 n:616 np:147 miss:0] ; pred:1.39635 | | ├─(pos)─ pred:0.0642654 | | └─(neg)─ pred:1.81387 | └─(neg)─ "per_terminal.moving_count_transaction_28d">=22.5 [s:0.0124898 n:421 np:251 miss:1] ; pred:5 | ├─(pos)─ pred:4.20204 | └─(neg)─ pred:5 └─(neg)─ "per_terminal.moving_sum_frauds">=1.5 [s:6.26323e-06 n:1308675 np:7935 miss:0] ; pred:-0.0385246 ├─(pos)─ "per_terminal.moving_count_transaction_28d">=11.5 [s:0.000800429 n:7935 np:7915 miss:1] ; pred:0.350309 | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=15.5 [s:0.000222715 n:7915 np:7832 miss:1] ; pred:0.333051 | | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=33.5 [s:0.000176741 n:7832 np:2934 miss:0] ; pred:0.314408 | | | ├─(pos)─ pred:0.105963 | | | └─(neg)─ pred:0.43927 | | └─(neg)─ "per_terminal.moving_count_transaction_7d">=9.5 [s:0.0112688 n:83 np:5 miss:0] ; pred:2.09225 | | ├─(pos)─ pred:5 | | └─(neg)─ pred:1.7661 | └─(neg)─ "per_terminal.moving_count_transaction_28d">=9.5 [s:0.0533333 n:20 np:15 miss:1] ; pred:5 | ├─(pos)─ "calendar_hour">=13.5 [s:0.0355556 n:15 np:5 miss:0] ; pred:5 | | ├─(pos)─ pred:2.32618 | | └─(neg)─ pred:5 | └─(neg)─ pred:5 └─(neg)─ "per_terminal.moving_sum_frauds">=0.5 [s:6.94014e-07 n:1300740 np:84741 miss:0] ; pred:-0.0408967 ├─(pos)─ "per_terminal.moving_count_transaction_28d">=23.5 [s:5.77336e-06 n:84741 np:68762 miss:1] ; pred:-0.00260123 | ├─(pos)─ "per_terminal.moving_count_transaction_28d">=31.5 [s:7.97811e-07 n:68762 np:35992 miss:0] ; pred:-0.0166571 | | ├─(pos)─ pred:-0.0269997 | | └─(neg)─ pred:-0.00529765 | └─(neg)─ "per_terminal.moving_count_transaction_7d">=13.5 [s:2.81166e-05 n:15979 np:8 miss:0] ; pred:0.0578851 | ├─(pos)─ pred:2.93294 | └─(neg)─ pred:0.0564449 └─(neg)─ "per_terminal.moving_count_transaction_28d">=10.5 [s:3.48456e-07 n:1215999 np:1124099 miss:1] ; pred:-0.0435654 ├─(pos)─ "per_terminal.moving_count_transaction_28d">=20.5 [s:3.55088e-08 n:1124099 np:910721 miss:1] ; pred:-0.0415172 | ├─(pos)─ pred:-0.0404104 | └─(neg)─ pred:-0.0462414 └─(neg)─ "per_terminal.moving_count_transaction_28d">=3.5 [s:6.14969e-07 n:91900 np:64839 miss:1] ; pred:-0.0686185 ├─(pos)─ pred:-0.0624707 └─(neg)─ pred:-0.083349
For example, if a client's card is stolen, we can look at the PDP model to see the relationship between the number of frauds in the last 4 weeks and the probability that a transaction is fraudulent.
model.analyze(test_ds_pd, sampling=0.1)
Variable importances measure the importance of an input feature for a model.
1. "per_terminal.moving_sum_frauds" 0.005308 ################ 2. "per_terminal.moving_count_transaction_28d" 0.001118 ### 3. "per_terminal.moving_count_transaction_7d" 0.000358 # 4. "calendar_hour" 0.000000 5. "per_terminal.moving_count_transaction_1d" -0.000007 6. "calendar_day_of_week" -0.000007
1. "per_terminal.moving_sum_frauds" 0.207669 ################ 2. "per_terminal.moving_count_transaction_28d" 0.058980 #### 3. "per_terminal.moving_count_transaction_7d" 0.023943 # 4. "calendar_hour" 0.000144 5. "calendar_day_of_week" -0.000243 6. "per_terminal.moving_count_transaction_1d" -0.001815
1. "per_terminal.moving_sum_frauds" 0.231211 ################ 2. "per_terminal.moving_count_transaction_28d" 0.015302 # 3. "calendar_hour" 0.001647 4. "per_terminal.moving_count_transaction_7d" 0.001499 5. "per_terminal.moving_count_transaction_1d" 0.000363 6. "calendar_day_of_week" -0.000701
1. "per_terminal.moving_sum_frauds" 0.207691 ################ 2. "per_terminal.moving_count_transaction_28d" 0.059061 #### 3. "per_terminal.moving_count_transaction_7d" 0.023948 # 4. "calendar_hour" 0.000115 5. "calendar_day_of_week" -0.000204 6. "per_terminal.moving_count_transaction_1d" -0.001902
1. "per_terminal.moving_sum_frauds" 0.900000 ################ 2. "per_terminal.moving_count_transaction_28d" 0.290564 ## 3. "per_terminal.moving_count_transaction_7d" 0.215782 4. "calendar_hour" 0.190089 5. "calendar_day_of_week" 0.188707 6. "per_terminal.moving_count_transaction_1d" 0.180135
1. "per_terminal.moving_sum_frauds" 88.000000 ################ 2. "per_terminal.moving_count_transaction_28d" 8.000000 # 3. "per_terminal.moving_count_transaction_7d" 2.000000 4. "calendar_day_of_week" 1.000000
1. "per_terminal.moving_count_transaction_28d" 859.000000 ################ 2. "per_terminal.moving_sum_frauds" 824.000000 ############### 3. "per_terminal.moving_count_transaction_7d" 525.000000 ######## 4. "calendar_hour" 306.000000 ### 5. "calendar_day_of_week" 214.000000 # 6. "per_terminal.moving_count_transaction_1d" 132.000000
1. "per_terminal.moving_sum_frauds" 2574.658942 ################ 2. "per_terminal.moving_count_transaction_28d" 692.886033 #### 3. "per_terminal.moving_count_transaction_7d" 305.556677 # 4. "calendar_hour" 83.875978 5. "calendar_day_of_week" 79.349436 6. "per_terminal.moving_count_transaction_1d" 34.315195