Local Setup: Using the Dataset with MySQL or MariaDB
This page explains how to set up the MIMIC-IV-Ext-CEKG dataset locally on your computer. However, due to the large volume of the dataset, using Google Cloud is recommended. To use the dataset locally, ensure that:
- A local RDBMS (such as MySQL or MariaDB) is installed on your computer.
- A local database administration tool (such as phpMyAdmin, DataGrip, etc.) is installed and configured.
- Fulfill all the steps for dataset access.
Once your local environment is ready, follow the steps provided to import and use the dataset for simulation, analysis, or application development.
Step1. Download the dataset files:
Go to the dataset paper page and download the dataset from the bottom of the page.
Step2. Unzip each required table.
Step3. Create a database named:
CREATE SCHEMA MimicCEKG;
Step 4: Import the Tables
Execute the following queries to import the corresponding table into your local database administration tool.
Table: EventLog
CREATE TABLE MimicCEKG.B_EventLog (
Event_ID VARCHAR(255),
Timestamps DATETIME,
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Attributes_ID VARCHAR(255),
Activity_Instance_ID VARCHAR(255)
Entity1_Origin VARCHAR(255),
Entity1_ID INT,
Entity2_Origin VARCHAR(255),
Entity2_ID INT,
Entity3_Origin VARCHAR(255),
Entity3_ID INT,
Entity4_Origin VARCHAR(255),
Entity4_ID INT,
Entity5_Origin VARCHAR(255),
Entity5_ID INT,
temp_patient_id INT,
temp_encounter_id VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/B_EventLog.csv'
INTO TABLE B_EventLog
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Event_ID, Timestamps, Activity, Activity_Synonym, Activity_Attributes_ID, Activity_Instance_ID, Entity1_Origin, Entity1_ID, Entity2_Origin, Entity2_ID, Entity3_Origin, Entity3_ID, Entity4_Origin, Entity4_ID, Entity5_Origin, Entity5_ID, temp_patient_id, temp_encounter_id);
Table: EntitiesAttributes
CREATE TABLE MimicCEKG.C_EntitiesAttributes (
Origin VARCHAR(255),
ID INT,
Name VARCHAR(255),
Value VARCHAR(255),
Category VARCHAR(255),
temp_patient_id INT,
temp_encounter_id INT
);
LOAD DATA LOCAL INFILE '/path/to/C_EntitiesAttributes.csv'
INTO TABLE C_EntitiesAttributes
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Origin, ID, Name, Value, Category, temp_patient_id, temp_encounter_id);
Table: EntitiesAttributeRel
CREATE TABLE MimicCEKG.D_EntitiesAttributeRel (
Origin1 VARCHAR(255),
ID1 VARCHAR(255),
Origin2 VARCHAR(255),
ID2 VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/D_EntitiesAttributeRel.csv'
INTO TABLE D_EntitiesAttributeRel
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Origin1, ID1, Origin2, ID2, temp_patient_id, temp_encounter_id);
Table: ActivityAttributes
CREATE TABLE MimicCEKG.E_ActivityAttributes (
Activity_Attributes_ID VARCHAR(255),
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Attribute VARCHAR(255),
Activity_Attribute_Value VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/E_ActivityAttributes.csv'
INTO TABLE E_ActivityAttributes
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Attributes_ID, Activity, Activity_Synonym, Activity_Attribute, Activity_Attribute_Value, temp_patient_id, temp_encounter_id);
Table: ActivitiesDomain
CREATE TABLE MimicCEKG.F_ActivitiesDomain (
Activity_Domain VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/F_ActivitiesDomain.csv'
INTO TABLE F_ActivitiesDomain
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Domain);
Table: ICD
CREATE TABLE MimicCEKG.G_ICD (
ICD_Origin VARCHAR(255),
ICD_Code VARCHAR(255),
ICD_Version VARCHAR(255),
ICD_Code_title VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/G_ICD.csv'
INTO TABLE G_ICD
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(ICD_Origin, ICD_Code, ICD_Version, ICD_Code_title);
Table: SCT_Node
CREATE TABLE MimicCEKG.H_SCT_Node (
SCT_ID INT,
SCT_Code INT,
SCT_DescriptionA_Type1 VARCHAR(255),
SCT_DescriptionA_Type2 VARCHAR(255),
SCT_DescriptionB VARCHAR(255),
SCT_Semantic_Tags VARCHAR(255),
SCT_Type VARCHAR(255),
SCT_Level VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/H_SCT_Node.csv'
INTO TABLE H_SCT_Node
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(SCT_ID, SCT_Code, SCT_DescriptionA_Type1, SCT_DescriptionA_Type2, SCT_DescriptionB, SCT_Semantic_Tags, SCT_Type, SCT_Level);
Table: SCT_REL
CREATE TABLE MimicCEKG.H_SCT_REL (
SCT_ID_1 INT,
SCT_Code_1 INT,
SCT_ID_2 INT,
SCT_Code_2 INT
);
LOAD DATA LOCAL INFILE '/path/to/H_SCT_REL.csv'
INTO TABLE H_SCT_REL
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(SCT_ID_1, SCT_Code_1, SCT_ID_2, SCT_Code_2);
Table: CNM1
CREATE TABLE MimicCEKG.I_CNM1 (
Disorder_ID INT,
ICD_Code VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/I_CNM1.csv'
INTO TABLE I_CNM1
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Disorder_ID, ICD_Code);
Table: CNM2
CREATE TABLE MimicCEKG.J_CNM2 (
ICD_Code VARCHAR(255),
SCT_ID VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/J_CNM2.csv'
INTO TABLE J_CNM2
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(ICD_Code, SCT_ID);
Table: CNM3
CREATE TABLE MimicCEKG.K_CNM3 (
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
SCT_ID INT,
SCT_Code INT
);
LOAD DATA LOCAL INFILE '/path/to/K_CNM3.csv'
INTO TABLE K_CNM3
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity, Activity_Synonym, SCT_ID, SCT_Code);
Table: CNM4_1
CREATE TABLE MimicCEKG.L_CNM4_1 (
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Domain VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/L_CNM4_1.csv'
INTO TABLE L_CNM4_1
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity, Activity_Synonym, Activity_Domain);
Table: CNM4_2
CREATE TABLE MimicCEKG.L_CNM4_2 (
Activity_Domain VARCHAR(255),
SCT_ID INT,
SCT_Code INT
);
LOAD DATA LOCAL INFILE '/path/to/L_CNM4_2.csv'
INTO TABLE L_CNM4_2
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Domain, SCT_ID, SCT_Code);
Table: CNM5_Activity_Instance_ID
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID (
Activity_Instance_ID VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID.csv'
INTO TABLE M_CNM5_Activity_Instance_ID
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID);
Table: CNM5_Activity_Instance_ID_with_Class_Part1
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_class_part1 (
Activity_Instance_ID VARCHAR(255),
Disorder_Name VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_class_part1.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_class_part1
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Disorder_Name, temp_patient_id, temp_encounter_id);
Table: CNM5_Activity_Instance_ID_with_Class_Part2
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_class_part2 (
Activity_Instance_ID VARCHAR(255),
Disorder_Name VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_class_part2.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_class_part2
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Disorder_Name, temp_patient_id, temp_encounter_id);
Table: CNM5_Activity_Instance_ID_with_Class_Part3
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_class_part3 (
Activity_Instance_ID VARCHAR(255),
Disorder_Name VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255)
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_class_part3.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_class_part3
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Disorder_Name, temp_patient_id, temp_encounter_id);
Table: CNM5_Activity_Instance_ID_with_Features_Part1
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_features_part1 (
Activity_Instance_ID VARCHAR(255),
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Attribute VARCHAR(255),
Activity_Attribute_Value VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255),
Timestamps DATETIME
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_features_part1.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_features_part1
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Activity, Activity_Synonym, Activity_Attribute, Activity_Attribute_Value, temp_patient_id, temp_encounter_id, Timestamps);
Table: CNM5_Activity_Instance_ID_with_Features_Part2
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_features_part2 (
Activity_Instance_ID VARCHAR(255),
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Attribute VARCHAR(255),
Activity_Attribute_Value VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255),
Timestamps DATETIME
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_features_part2.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_features_part2
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Activity, Activity_Synonym, Activity_Attribute, Activity_Attribute_Value, temp_patient_id, temp_encounter_id, Timestamps);
Table: CNM5_Activity_Instance_ID_with_Features_Part3
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_features_part3 (
Activity_Instance_ID VARCHAR(255),
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Attribute VARCHAR(255),
Activity_Attribute_Value VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255),
Timestamps DATETIME
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_features_part3.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_features_part3
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Activity, Activity_Synonym, Activity_Attribute, Activity_Attribute_Value, temp_patient_id, temp_encounter_id, Timestamps);
Table: CNM5_Activity_Instance_ID_with_Features_Part4
CREATE TABLE MimicCEKG.M_CNM5_Activity_Instance_ID_with_features_part4 (
Activity_Instance_ID VARCHAR(255),
Activity VARCHAR(255),
Activity_Synonym VARCHAR(255),
Activity_Attribute VARCHAR(255),
Activity_Attribute_Value VARCHAR(255),
temp_patient_id INT,
temp_encounter_id VARCHAR(255),
Timestamps DATETIME
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_Activity_Instance_ID_with_features_part4.csv'
INTO TABLE M_CNM5_Activity_Instance_ID_with_features_part4
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Activity_Instance_ID, Activity, Activity_Synonym, Activity_Attribute, Activity_Attribute_Value, temp_patient_id, temp_encounter_id, Timestamps);
Table: CNM5_class
CREATE TABLE MimicCEKG.M_CNM5_class (
Disorder_Name VARCHAR(255),
Disorder_ID INT
);
LOAD DATA LOCAL INFILE '/path/to/M_CNM5_class.csv'
INTO TABLE M_CNM5_class
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(Disorder_Name, Disorder_ID);
Table: ClusterReference1
CREATE TABLE MimicCEKG.N_ClusterReference1 (
temp_patient_id INT,
Morbid_num INT,
Admission_num INT,
gender INT,
anchor_age INT,
dod INT
);
LOAD DATA LOCAL INFILE '/path/to/N_ClusterReference1.csv'
INTO TABLE N_ClusterReference1
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(temp_patient_id, Morbid_num, Admission_num, gender, anchor_age, dod);
Table: ClusterReference2
CREATE TABLE MimicCEKG.N_ClusterReference2 (
temp_patient_id INT,
temp_encounter_id INT,
ICD10_Code VARCHAR(255),
ICD10_Code_Root VARCHAR(255),
ICD10_Code_title VARCHAR(255),
ICD10_Code_Root_title VARCHAR(255),
Morbid_num INT,
Admission_num INT
);
LOAD DATA LOCAL INFILE '/path/to/N_ClusterReference2.csv'
INTO TABLE N_ClusterReference2
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS
(temp_patient_id, temp_encounter_id, ICD10_Code, ICD10_Code_Root, ICD10_Code_title, ICD10_Code_Root_title, Morbid_num, Admission_num);