diff --git a/source/api/c_api.c b/source/api/c_api.c
index ec3edb829..5838cd067 100644
--- a/source/api/c_api.c
+++ b/source/api/c_api.c
@@ -387,6 +387,9 @@ graph_t create_graph(context_t context, const char* model_format, const char* fi
         return NULL;
     }
 
+    // add model_name to cache nvdla bin
+    ir_graph->model_name = get_filename_ptr(file_name);
+
     ir_graph->attribute->private_context = is_new_context;
 
     if (NULL != model_format)
diff --git a/source/device/opendla/odla_executor.cc b/source/device/opendla/odla_executor.cc
index a28dee1e7..0e44df959 100644
--- a/source/device/opendla/odla_executor.cc
+++ b/source/device/opendla/odla_executor.cc
@@ -593,6 +593,78 @@ int ODLAEngine::ODLAEnginePreRun(struct subgraph* subgraph)
 #endif
     NvDlaError e = NvDlaSuccess;
     struct graph* ir_graph = subgraph->graph;
+
+	// add to use cache bin
+    struct graph* ir_graph = subgraph->graph; 
+    std::stringstream cache_key_stream;
+    cache_key_stream << ir_graph->model_name << "_" 
+                    << this->profile->getName() << "_subgraph_" << subgraph->index
+                    << "_p" << (int)this->precision
+                    << "_b" << this->numBatches;    
+
+    std::hash<std::string> hasher;
+    std::string cache_filename = std::to_string(hasher(cache_key_stream.str())) + ".nvdla";
+    const char* cache_enable = getenv("TENGINE_ODLA_CACHE_ENABLE");
+    const char* cache_dir_env = getenv("TENGINE_ODLA_CACHE_DIR");
+    std::string cache_dir = (cache_dir_env) ? std::string(cache_dir_env) : "/tmp/tengine_cache";
+    std::string cached_file_path = cache_dir + "/" + cache_filename;
+    NvDlaFileHandle file_handle = 0;
+    printf("cache_enable = %s %d\n", cache_enable, strcmp(cache_enable, "ON"));
+    if (cache_enable && (strcmp(cache_enable, "ON") == 0) && (NvDlaFopen(cached_file_path.c_str(), NVDLA_OPEN_READ, &file_handle) == NvDlaSuccess))
+    {
+        fprintf(stdout, "[Tengine ODLA Cache] HIT. Loading from: %s\n", cached_file_path.c_str());
+        NvDlaStatType stat_info;
+        e = NvDlaFstat(file_handle, &stat_info);
+        if(e != NvDlaSuccess){
+             fprintf(stdout, "NvDlaFstat ERROR e %");
+             return -1;
+        }
+        NvU64 loadableSize = stat_info.size;
+
+        NvU8* buffer = (NvU8*)NvDlaAlloc(loadableSize);
+        NvDlaFseek(file_handle, 0, NvDlaSeek_Set);
+        if (buffer)
+        {
+            size_t bytes_read = 0;
+            e = NvDlaFread(file_handle, buffer, loadableSize, &bytes_read);
+            if(e != NvDlaSuccess || bytes_read != loadableSize){
+                fprintf(stdout, "NvDlaFread ERROR e %");
+                return -1;
+            }
+            NvDlaFclose(file_handle);
+
+            this->runtime->load(buffer, 0);
+            NvDlaFree(buffer);
+            if (subgraph->input_num > 0)
+            {
+                this->inputBuffer.reserve(subgraph->input_num);
+                for (uint8_t i = 0; i < subgraph->input_num; i++)
+                {
+                    nvdla::IRuntime::NvDlaTensor tDesc;
+                    void *hMem = NULL;
+                    this->runtime->getInputTensorDesc(i, &tDesc);
+                    this->runtime->allocateSystemMemory(&hMem, tDesc.bufferSize, &this->inputBuffer[i]);
+                    this->runtime->bindInputTensor(i, hMem);
+                }
+            }
+            if(subgraph->output_num > 0)
+            {
+                this->outputBuffer.reserve(subgraph->output_num);
+                for (uint8_t i = 0; i < subgraph->output_num; i++)
+                {
+                    nvdla::IRuntime::NvDlaTensor tDesc;
+                    void *hMem = nullptr;
+                    this->runtime->getOutputTensorDesc(i, &tDesc);
+                    this->runtime->allocateSystemMemory(&hMem, tDesc.bufferSize, &this->outputBuffer[i]);
+                    this->runtime->bindOutputTensor(i, hMem);
+                }
+            }
+            return 0;
+        }
+    }
+     // --- CACHE MISS ---
+    fprintf(stdout, "[Tengine ODLA Cache] MISS. Compiling subgraph %d...\n", subgraph->index);
+
     /* Add OpenDLA Tensor */
     for (uint8_t i = 0; i < subgraph->input_num; i++)
     {
@@ -881,6 +953,22 @@ int ODLAEngine::ODLAEnginePreRun(struct subgraph* subgraph)
         }
         this->loadable.priv()->getSerializedData(buffer);
 
+
+
+        // add cache nvdla bin
+        struct stat st = {0};
+        if (stat(cache_dir.c_str(), &st) == -1) {
+            mkdir(cache_dir.c_str(), 0755);
+        }
+    
+        NvDlaFileHandle cache_file_handle = 0;
+        if (cache_enable && (strcmp(cache_enable, "ON") == 0) && (NvDlaFopen(cached_file_path.c_str(), NVDLA_OPEN_WRITE, &cache_file_handle) == NvDlaSuccess))
+        {
+            NvDlaFwrite(cache_file_handle, buffer, loadableSize);
+            NvDlaFclose(cache_file_handle);
+            fprintf(stdout, "[Tengine ODLA Cache] SAVED. Stored cache to: %s\n", cached_file_path.c_str());
+        }
+
         env = getenv(OPENDLA_DUMP_LAYER);
         if (env && env[0] == '1'){
             NvDlaFileHandle file = 0;
diff --git a/source/graph/graph.h b/source/graph/graph.h
index e41e6655e..8a94002fe 100644
--- a/source/graph/graph.h
+++ b/source/graph/graph.h
@@ -68,6 +68,7 @@ typedef struct graph
     struct attribute* attribute; //<! attribute of graph
 
     struct vector* subgraph_list; //!< subgraph list of this graph
+    const char* model_name; // ! < model name 
 } ir_graph_t;
 
 /*!
diff --git a/source/utility/utils.c b/source/utility/utils.c
index 8079d1336..af73d95f5 100644
--- a/source/utility/utils.c
+++ b/source/utility/utils.c
@@ -202,3 +202,15 @@ int get_mask_index(size_t mask)
 
     return 0;
 }
+
+
+const char* get_filename_ptr(const char* path) {
+    if (!path || !*path) return path;
+    
+    const char* last_slash = strrchr(path, '/');
+    const char* last_backslash = strrchr(path, '\\');
+    
+    const char* last_sep = last_slash > last_backslash ? last_slash : last_backslash;
+    
+    return last_sep ? (last_sep + 1) : path;
+}
diff --git a/source/utility/utils.h b/source/utility/utils.h
index 23056a02b..f35dd30f3 100644
--- a/source/utility/utils.h
+++ b/source/utility/utils.h
@@ -109,6 +109,8 @@ int get_mask_count(size_t mask);
 
 int get_mask_index(size_t mask);
 
+const char* get_filename_ptr(const char* path);
+
 #ifdef __cplusplus
 }
 #endif