diff --git a/README.md b/README.md
index f0681ba..7b304e8 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,11 @@
 
 Lightweight implementations of generative label models for weakly supervised machine learning
 
+### Installation
+```
+pip3 install git+https://github.com/yongzx/labelmodels.git@top-k-probable
+```
+
 # Example Usage - Naive Bayes Model
 ```python
 # Let votes be an m x n matrix where m is the number of data examples, n is the
diff --git a/labelmodels/hmm.py b/labelmodels/hmm.py
index b6b9597..eb0fd2f 100644
--- a/labelmodels/hmm.py
+++ b/labelmodels/hmm.py
@@ -129,7 +129,6 @@ def get_most_probable_labels(self, votes, seq_starts):
         seq_starts = np.array(seq_starts, dtype=np.int)
 
         out = np.ndarray((votes.shape[0],), dtype=np.int)
-
         offset = 0
         for votes, seq_starts in self._create_minibatches(votes, seq_starts, 32):
             jll = self._get_labeling_function_likelihoods(votes)
@@ -166,6 +165,139 @@ def get_most_probable_labels(self, votes, seq_starts):
             offset += len(res)
         return out
 
+    def get_k_most_probable_labels(self, votes, seq_starts, topk, return_viterbi_scores=False):
+        """
+        Computes the topk most probable underlying sequence nodes given function
+        outputs.
+
+        Based on https://github.com/allenai/allennlp/blob/master/allennlp/nn/util.py
+
+        :param votes: m x n matrix in {0, ..., k}, where m is the sum of
+                            the lengths of the sequences in the batch, n is the
+                            number of labeling functions and k is the number of
+                            classes
+        :param seq_starts: vector of length l of row indices in votes indicating
+                           the start of each sequence, where l is the number of
+                           sequences in the batch. So, votes[seq_starts[i]]
+                           is the row vector of labeling function outputs for the
+                           first element in the ith sequence
+        :return: matrix of shape (topk, m), where element is the most likely predicted labels
+        """
+        # Converts to CSR and integers to standardize input
+        votes = sparse.csr_matrix(votes, dtype=np.int)
+        seq_starts = np.array(seq_starts, dtype=np.int)
+
+        out = np.ndarray((topk, votes.shape[0],), dtype=np.int32)
+        out_scores = np.ndarray((topk, seq_starts.shape[0],), dtype=np.float64)
+        final_scores = []
+
+        EMPTY = -1
+        offset = 0
+        offset_scores = 0
+        for votes, seq_starts in self._create_minibatches(
+                votes, seq_starts, 32):
+            # Initializes joint log likelihood with labeling function likelihood
+            jll = self._get_labeling_function_likelihoods(votes)
+            norm_start_balance = self._get_norm_start_balance()
+            norm_transitions = self._get_norm_transitions()
+
+            path_scores = []
+            path_indices = []
+            normalization = []
+            T = votes.shape[0]
+            seq_ends = [x - 1 for x in seq_starts] + [votes.shape[0] - 1]
+
+            # follow https://github.com/stanfordnlp/stanza/blob/b24d124156911f95e3c5715e9dc9f75c6076619c/stanza/models/common/crf.py#L77
+            # for implementation of normalization of viterbi scores
+            for i in range(0, T):
+                if i in seq_starts:
+                    path_scores.append((jll[i] + norm_start_balance).unsqueeze(0))
+                    path_indices.append(torch.zeros([self.num_classes, self.num_classes]))
+
+                    alphas = (jll[i] + norm_start_balance).unsqueeze(0) # shape: (1, self.num_classes)
+                else:                  
+                    p = path_scores[i-1].clone().unsqueeze(2) + norm_transitions
+                    p = p.view(-1, self.num_classes)  # shape: (self.num_classes, self.num_classes)
+                    maxk = min(p.size()[0], topk)
+                    scores, paths = torch.topk(p, k=maxk, dim=0)  # paths would use (num_tags * n_permutations) nodes
+
+                    assert scores.shape == (maxk, self.num_classes)
+                    assert paths.shape == (maxk, self.num_classes)
+                    scores = jll[i] + scores
+
+                    path_scores.append(scores)
+                    path_indices.append(paths)
+
+                    transition_scores = alphas.unsqueeze(2) + norm_transitions  # shape: (1, self.num_classes, self.num_classes)
+                    alphas = jll[i] + torch.logsumexp(transition_scores, dim=1)
+                
+                if i in seq_ends:
+                    log_norm = torch.logsumexp(alphas, dim=1)
+                    normalization.append(log_norm.item())
+
+            res = []
+            res_scores = []
+            seq_ends = [x - 1 for x in seq_starts] + [votes.shape[0] - 1]
+            for k in range(topk):
+                j = T-1
+                viterbi_path = []
+                viterbi_score = []
+                while j >= 0:
+                    if j in seq_ends:
+                        seq_path_scores = path_scores[j].view(-1)
+                        skip_rest = False
+                        if seq_path_scores.shape[0] <= k:
+                            # print("seq_end:", j)
+                            skip_rest = True
+
+                        viterbi_scores, best_paths = torch.topk(seq_path_scores, k=min(topk, seq_path_scores.shape[0]), dim=0)  # capped at 256 because some instances are 4-token long
+                        if skip_rest:
+                            viterbi_path.append(EMPTY)
+                            viterbi_score.append(EMPTY)
+                        else:
+                            viterbi_path.append(best_paths[k])
+                            viterbi_score.append(viterbi_scores[k])
+                        # if k == 0:
+                        #     # because viterbi_scores include scores for other k, 
+                        #     # this if-condition ensures that we only need to store the viterbi_scores for 
+                        #     final_scores.append(viterbi_scores.tolist() + [-1] * (topk - seq_path_scores.shape[0]))  # 
+                    if j in seq_starts:
+                        j -= 1
+                        continue
+                    if skip_rest:
+                        viterbi_path.append(EMPTY)
+                    else:
+                        viterbi_path.append(int(path_indices[j].view(-1)[viterbi_path[-1]]))
+                    j -= 1
+                # # if path == -1, it means that at this k, there's no viterbi path. E.g., k = 257 and we are working with 4-token sentence
+                # # assert False 
+                # if -1 in viterbi_path:
+                #     assert False
+                viterbi_path = [(int(path % self.num_classes) + 1) if path != -1 else -1 for path in viterbi_path]  
+                viterbi_path.reverse()
+                viterbi_score.reverse()
+                res.append(viterbi_path)
+                res_scores.append(viterbi_score)
+
+
+            for k in range(topk):
+                for i in range(len(res[k])):
+                    out[k][offset + i] = res[k][i]
+
+            for k in range(topk):
+                for i in range(len(res_scores[k])):
+                    if res_scores[k][i] != EMPTY:
+                        out_scores[k][offset_scores + i] = res_scores[k][i] - normalization[i]
+                    else:
+                        out_scores[k][offset_scores + i] = res_scores[k][i]
+
+            offset += len(res[0])
+            offset_scores += len(res_scores[0])
+
+        if return_viterbi_scores:
+            return out, out_scores
+        return out
+
     def get_label_distribution(self, votes, seq_starts):
         """Returns the unary and pairwise marginals over true labels estimated
         by the model.
diff --git a/labelmodels/linked_hmm.py b/labelmodels/linked_hmm.py
index 40e4ce8..fe7c0da 100644
--- a/labelmodels/linked_hmm.py
+++ b/labelmodels/linked_hmm.py
@@ -1,3 +1,6 @@
+from cProfile import label
+from contextlib import AsyncExitStack
+from os import link
 from .label_model import ClassConditionalLabelModel, LearningConfig, init_random
 import numpy as np
 from scipy import sparse
@@ -173,7 +176,7 @@ def get_link_propensities(self):
         prop = self.link_propensity.detach().numpy()
         return np.exp(prop) / (np.exp(prop) + 1)
 
-    def get_most_probable_labels(self, label_votes, link_votes, seq_starts):
+    def get_most_probable_labels(self, label_votes, link_votes, seq_starts, return_viterbi_scores=False):
         """
         Computes the most probable underlying sequence nodes given function
         outputs
@@ -198,6 +201,7 @@ def get_most_probable_labels(self, label_votes, link_votes, seq_starts):
         seq_starts = np.array(seq_starts, dtype=np.int)
 
         out = np.ndarray((label_votes.shape[0],), dtype=np.int)
+        final_scores = []
 
         offset = 0
         for label_votes, link_votes, seq_starts in self._create_minibatches(
@@ -223,9 +227,11 @@ def get_most_probable_labels(self, label_votes, link_votes, seq_starts):
             seq_ends = [x - 1 for x in seq_starts] + [label_votes.shape[0] - 1]
             res = []
             j = T-1
+            _scores = list()
             while j >= 0:
                 if j in seq_ends:
                     res.append(torch.argmax(jll[j, :]).item())
+                    _scores.append(torch.max(jll[j, :]).item())
                 if j in seq_starts:
                     j -= 1
                     continue
@@ -233,10 +239,224 @@ def get_most_probable_labels(self, label_votes, link_votes, seq_starts):
                 j -= 1
             res = [x + 1 for x in res]
             res.reverse()
+            _scores.reverse()
+            final_scores += _scores
 
             for i in range(len(res)):
                 out[offset + i] = res[i]
             offset += len(res)
+        if return_viterbi_scores:
+            return out, np.array(final_scores)
+        return out
+
+    def compute_viterbi(self, label_votes, link_votes, seq_starts, return_viterbi_scores=False):
+        """
+        Computes the most probable underlying sequence nodes given function
+        outputs
+
+        :param label_votes: m x n matrix in {0, ..., k}, where m is the sum of
+                            the lengths of the sequences in the batch, n is the
+                            number of labeling functions and k is the number of
+                            classes
+        :param link_votes: m x n matrix in {-1, 0, 1}, where m is the sum of
+                           the lengths of the sequences in the batch and n is the
+                           number of linking functions
+        :param seq_starts: vector of length l of row indices in votes indicating
+                           the start of each sequence, where l is the number of
+                           sequences in the batch. So, label_votes[seq_starts[i]]
+                           is the row vector of labeling function outputs for the
+                           first element in the ith sequence
+        :return: vector of length m, where element is the most likely predicted labels
+        """
+        # Converts to CSR and integers to standardize input
+        label_votes = sparse.csr_matrix(label_votes, dtype=np.int)
+        link_votes = sparse.csr_matrix(link_votes, dtype=np.int)
+        seq_starts = np.array(seq_starts, dtype=np.int)
+
+        out = np.ndarray((label_votes.shape[0],), dtype=np.int)
+        final = []
+
+        offset = 0
+        for label_votes, link_votes, seq_starts in self._create_minibatches(
+                label_votes, link_votes, seq_starts, 32):
+            # Initializes joint log likelihood with labeling function likelihood
+            jll = self._get_labeling_function_likelihoods(label_votes) # (#tokens, #classes)
+            link_cll = self._get_linking_function_likelihoods(link_votes) # (#tokens, #classes, #classes)
+            norm_start_balance = self._get_norm_start_balance() # (#classes)
+            norm_transitions = self._get_norm_transitions() # (#classes, #classes)
+            
+            D = {}
+            
+            T = label_votes.shape[0]
+            # bt = torch.zeros([T, self.num_classes])
+            for i in range(0, T):
+                new_D = dict()
+                if i in seq_starts:
+                    # unary + start balance
+                    for label in range(jll.shape[1]):
+                        new_D[str(label)] = jll[i][label].item() + norm_start_balance[label].item()
+                    if D:
+                        final.append(D)
+                        D = {}
+                else:
+                    # previous score + transition + linking + unary
+                    for prev_seq, score in D.items():
+                        prev_label = int(prev_seq[-1])
+                        for label in range(jll.shape[1]):
+                            new_score = score + norm_transitions[prev_label][label] + link_cll[i][prev_label][label] + jll[i][label]
+                            new_D[f"{prev_seq}{label}"] = new_score.item()
+                D = new_D
+            if D:
+                final.append(D)
+            return final
+
+
+    def get_link_propensities(self):
+        """Returns the model's estimated linking function propensities, i.e.,
+        the probability that a linking function does not abstain
+        :return: a NumPy array with one element in [0,1] for each linking
+                 function, representing the estimated probability that
+                 the corresponding linking function does not abstain
+        """
+        prop = self.link_propensity.detach().numpy()
+        return np.exp(prop) / (np.exp(prop) + 1)
+
+    def get_k_most_probable_labels(self, label_votes, link_votes, seq_starts, topk, return_viterbi_scores=False):
+        """
+        Computes the topk most probable underlying sequence nodes given function
+        outputs.
+
+        Based on https://github.com/allenai/allennlp/blob/master/allennlp/nn/util.py
+
+        :param label_votes: m x n matrix in {0, ..., k}, where m is the sum of
+                            the lengths of the sequences in the batch, n is the
+                            number of labeling functions and k is the number of
+                            classes
+        :param link_votes: m x n matrix in {-1, 0, 1}, where m is the sum of
+                           the lengths of the sequences in the batch and n is the
+                           number of linking functions
+        :param seq_starts: vector of length l of row indices in votes indicating
+                           the start of each sequence, where l is the number of
+                           sequences in the batch. So, label_votes[seq_starts[i]]
+                           is the row vector of labeling function outputs for the
+                           first element in the ith sequence
+        :return: matrix of shape (topk, m), where element is the most likely predicted labels
+        """
+        # Converts to CSR and integers to standardize input
+        label_votes = sparse.csr_matrix(label_votes, dtype=np.int32)
+        link_votes = sparse.csr_matrix(link_votes, dtype=np.int)
+        seq_starts = np.array(seq_starts, dtype=np.int)
+
+        out = np.ndarray((topk, label_votes.shape[0],), dtype=np.int32)
+        out_scores = np.ndarray((topk, seq_starts.shape[0],), dtype=np.float64)
+        final_scores = []
+
+        offset = 0
+        offset_scores = 0
+        for label_votes, link_votes, seq_starts in self._create_minibatches(
+                label_votes, link_votes, seq_starts, 32):
+            # Initializes joint log likelihood with labeling function likelihood
+            jll = self._get_labeling_function_likelihoods(label_votes)
+            link_cll = self._get_linking_function_likelihoods(link_votes)
+            norm_start_balance = self._get_norm_start_balance()
+            norm_transitions = self._get_norm_transitions()
+
+            path_scores = []
+            path_indices = []
+            T = label_votes.shape[0]
+            normalization = []
+            seq_ends = [x - 1 for x in seq_starts] + [label_votes.shape[0] - 1]
+
+            for i in range(0, T):
+                if i in seq_starts:
+                    path_scores.append((jll[i] + norm_start_balance).unsqueeze(0))
+                    path_indices.append(torch.zeros([self.num_classes, self.num_classes]))
+
+                    alphas = (jll[i] + norm_start_balance).unsqueeze(0) # shape: (1, self.num_classes)
+                else:                  
+                    p = path_scores[i-1].clone().unsqueeze(2) + norm_transitions
+                    p += link_cll[i]
+                    p = p.view(-1, self.num_classes)  # shape: (self.num_classes, self.num_classes)
+                    
+                    maxk = min(p.size()[0], topk)
+                    scores, paths = torch.topk(p, k=maxk, dim=0)  # paths would use (num_tags * n_permutations) nodes
+
+                    assert scores.shape == (maxk, self.num_classes)
+                    assert paths.shape == (maxk, self.num_classes)
+                    scores = jll[i] + scores
+
+                    path_scores.append(scores)
+                    path_indices.append(paths)
+
+                    transition_scores = alphas.unsqueeze(2) + norm_transitions + link_cll[i]  # shape: (1, self.num_classes, self.num_classes)
+                    alphas = jll[i] + torch.logsumexp(transition_scores, dim=1)  #  shape: (1, self.num_classes)
+                
+                if i in seq_ends:
+                    log_norm = torch.logsumexp(alphas, dim=1)
+                    normalization.append(log_norm.item())
+
+            res = []
+            res_scores = []
+            seq_ends = [x - 1 for x in seq_starts] + [label_votes.shape[0] - 1]
+            for k in range(topk):
+                j = T-1
+                viterbi_path = []
+                viterbi_score = []
+                while j >= 0:
+                    if j in seq_ends:
+                        seq_path_scores = path_scores[j].view(-1)
+                        skip_rest = False
+                        if seq_path_scores.shape[0] <= k:
+                            # print("seq_end:", j)
+                            skip_rest = True
+
+                        viterbi_scores, best_paths = torch.topk(seq_path_scores, k=min(topk, seq_path_scores.shape[0]), dim=0)  # capped at 256 because some instances are 4-token long
+                        if skip_rest:
+                            viterbi_path.append(-1)
+                            viterbi_score.append(-1)
+                        else:
+                            viterbi_path.append(best_paths[k])
+                            viterbi_score.append(viterbi_scores[k])
+                        # if k == 0:
+                        #     # because viterbi_scores include scores for other k, 
+                        #     # this if-condition ensures that we only need to store the viterbi_scores for 
+                        #     final_scores.append(viterbi_scores.tolist() + [-1] * (topk - seq_path_scores.shape[0]))  # 
+                    if j in seq_starts:
+                        j -= 1
+                        continue
+                    if skip_rest:
+                        viterbi_path.append(-1)
+                    else:
+                        viterbi_path.append(int(path_indices[j].view(-1)[viterbi_path[-1]]))
+                    j -= 1
+                # # if path == -1, it means that at this k, there's no viterbi path. E.g., k = 257 and we are working with 4-token sentence
+                # # assert False 
+                # if -1 in viterbi_path:
+                #     assert False
+                viterbi_path = [(int(path % self.num_classes) + 1) if path != -1 else -1 for path in viterbi_path]  
+                viterbi_path.reverse()
+                viterbi_score.reverse()
+                res.append(viterbi_path)
+                res_scores.append(viterbi_score)
+
+
+            for k in range(topk):
+                for i in range(len(res[k])):
+                    out[k][offset + i] = res[k][i]
+
+            for k in range(topk):
+                for i in range(len(res_scores[k])):
+                    if res_scores[k][i] != -1:
+                        out_scores[k][offset_scores + i] = res_scores[k][i] - normalization[i]
+                        # out_scores[k][offset_scores + i] = res_scores[k][i]
+                    else:
+                        out_scores[k][offset_scores + i] = res_scores[k][i]
+
+            offset += len(res[0])
+            offset_scores += len(res_scores[0])
+
+        if return_viterbi_scores:
+            return out, out_scores
         return out
 
     def get_label_distribution(self, label_votes, link_votes, seq_starts):
@@ -331,7 +551,7 @@ def get_label_distribution(self, label_votes, link_votes, seq_starts):
                 p_pairwise[i] -= denom
 
                 out_pairwise[offset + i, :, :] = torch.exp(p_pairwise[i]).detach()
-
+            
             offset += label_votes.shape[0]
 
         return out_unary, out_pairwise
diff --git a/test/test_hmm.py b/test/test_hmm.py
index bcba655..0c607bb 100644
--- a/test/test_hmm.py
+++ b/test/test_hmm.py
@@ -1,3 +1,7 @@
+import sys
+mypath = "/Users/zhengxinyong/Desktop/labelmodels"
+sys.path.append(mypath)
+
 from labelmodels import HMM
 import numpy as np
 from scipy import sparse
@@ -13,163 +17,207 @@ def setUp(self):
     def tearDown(self):
         pass
 
-    def test_estimate_label_model_binary(self):
-        n = 5
+    # def test_estimate_label_model_binary(self):
+    #     n = 5
+    #     k = 2
+
+    #     accuracies = np.array([[.9, .8],
+    #                            [.6, .7],
+    #                            [.6, .6],
+    #                            [.7, .6],
+    #                            [.8, .8]])
+    #     propensities = np.array([.9] * n)
+    #     start_balance = np.array([.3, .7])
+    #     transitions = np.array([[.5, .5], [.3, .7]])
+
+    #     labels_train, seq_starts_train, gold_train = _generate_data(
+    #         1000, 8, 12, n, accuracies, propensities, start_balance, transitions
+    #     )
+
+    #     model = HMM(k, n, acc_prior=0.0, balance_prior=0.0)
+    #     model.estimate_label_model(labels_train, seq_starts_train)
+
+    #     for i in range(n):
+    #         for j in range(k):
+    #             diff = accuracies[i, j] - model.get_accuracies()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n):
+    #         diff = propensities[i] - model.get_propensities()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         diff = start_balance[i] - model.get_start_balance()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         for j in range(k):
+    #             diff = transitions[i, j] - model.get_transition_matrix()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+
+    # def test_estimate_label_model_multiclass(self):
+    #     n = 5
+    #     k = 3
+
+    #     accuracies = np.array([[.9, .8, .9],
+    #                            [.6, .7, .9],
+    #                            [.6, .6, .9],
+    #                            [.7, .6, .9],
+    #                            [.8, .8, .9]])
+    #     propensities = np.array([.9] * n)
+    #     start_balance = np.array([.3, .3, .4])
+    #     transitions = np.array([[.5, .3, .2],
+    #                             [.3, .4, .3],
+    #                             [.2, .5, .3]])
+
+    #     labels_train, seq_starts_train, gold_train = _generate_data(
+    #         1000, 8, 12, n, accuracies, propensities, start_balance, transitions
+    #     )
+
+    #     model = HMM(k, n, acc_prior=0.0, balance_prior=0.0)
+    #     model.estimate_label_model(labels_train, seq_starts_train)
+
+    #     for i in range(n):
+    #         for j in range(k):
+    #             diff = accuracies[i, j] - model.get_accuracies()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n):
+    #         diff = propensities[i] - model.get_propensities()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         diff = start_balance[i] - model.get_start_balance()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         for j in range(k):
+    #             diff = transitions[i, j] - model.get_transition_matrix()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+
+    # def test_get_most_probable_labels(self):
+    #     m = 500
+    #     n = 10
+    #     k = 3
+
+    #     model = HMM(k, n, acc_prior=0.0)
+    #     with torch.no_grad():
+    #         model.start_balance[0] = 0
+    #         model.start_balance[1] = 0.5
+    #         for i in range(n):
+    #             model.propensity[i] = 2
+    #             for j in range(k):
+    #                 model.accuracy[i, j] = 2
+    #         for i in range(k):
+    #             for j in range(k):
+    #                 model.transitions[i, j] = 1 if i == j else 0
+
+    #     labels_train, seq_starts_train, gold_train = _generate_data(
+    #         m, 8, 12, n,
+    #         model.get_accuracies(),
+    #         model.get_propensities(),
+    #         model.get_start_balance(),
+    #         model.get_transition_matrix())
+
+    #     predictions = model.get_most_probable_labels(labels_train, seq_starts_train)
+    #     correct = 0
+    #     for i in range(len(predictions)):
+    #         if predictions[i] == gold_train[i]:
+    #             correct += 1
+    #     accuracy = correct / float(len(predictions))
+    #     self.assertGreaterEqual(accuracy, .95)
+    
+    def test_get_k_most_probable_labels(self):
+        n = 3
         k = 2
 
-        accuracies = np.array([[.9, .8],
-                               [.6, .7],
-                               [.6, .6],
-                               [.7, .6],
-                               [.8, .8]])
-        propensities = np.array([.9] * n)
-        start_balance = np.array([.3, .7])
-        transitions = np.array([[.5, .5], [.3, .7]])
-
-        labels_train, seq_starts_train, gold_train = _generate_data(
-            1000, 8, 12, n, accuracies, propensities, start_balance, transitions
-        )
-
-        model = HMM(k, n, acc_prior=0.0, balance_prior=0.0)
-        model.estimate_label_model(labels_train, seq_starts_train)
-
-        for i in range(n):
-            for j in range(k):
-                diff = accuracies[i, j] - model.get_accuracies()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n):
-            diff = propensities[i] - model.get_propensities()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            diff = start_balance[i] - model.get_start_balance()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            for j in range(k):
-                diff = transitions[i, j] - model.get_transition_matrix()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-
-    def test_estimate_label_model_multiclass(self):
-        n = 5
-        k = 3
-
-        accuracies = np.array([[.9, .8, .9],
-                               [.6, .7, .9],
-                               [.6, .6, .9],
-                               [.7, .6, .9],
-                               [.8, .8, .9]])
-        propensities = np.array([.9] * n)
-        start_balance = np.array([.3, .3, .4])
-        transitions = np.array([[.5, .3, .2],
-                                [.3, .4, .3],
-                                [.2, .5, .3]])
-
-        labels_train, seq_starts_train, gold_train = _generate_data(
-            1000, 8, 12, n, accuracies, propensities, start_balance, transitions
-        )
-
-        model = HMM(k, n, acc_prior=0.0, balance_prior=0.0)
+        model = HMM(k, n, init_acc=0.9, acc_prior=0.0)
+        labels_train = [[2, 0, 2], [1, 2, 2], [1, 0, 1], [1, 0, 1], [1, 0, 1], [1, 0, 1], [1, 0, 2], [1, 0, 1], [1, 0, 1], [1, 0, 2]]
+        seq_starts_train = [0, 2, 5, 8]
         model.estimate_label_model(labels_train, seq_starts_train)
+        print(model.get_accuracies())
 
-        for i in range(n):
-            for j in range(k):
-                diff = accuracies[i, j] - model.get_accuracies()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n):
-            diff = propensities[i] - model.get_propensities()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            diff = start_balance[i] - model.get_start_balance()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            for j in range(k):
-                diff = transitions[i, j] - model.get_transition_matrix()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-
-    def test_get_most_probable_labels(self):
-        m = 500
-        n = 10
-        k = 3
-
-        model = HMM(k, n, acc_prior=0.0)
-        with torch.no_grad():
-            model.start_balance[0] = 0
-            model.start_balance[1] = 0.5
-            for i in range(n):
-                model.propensity[i] = 2
-                for j in range(k):
-                    model.accuracy[i, j] = 2
-            for i in range(k):
-                for j in range(k):
-                    model.transitions[i, j] = 1 if i == j else 0
-
-        labels_train, seq_starts_train, gold_train = _generate_data(
-            m, 8, 12, n,
-            model.get_accuracies(),
-            model.get_propensities(),
-            model.get_start_balance(),
-            model.get_transition_matrix())
+        predictions, scores = model.get_k_most_probable_labels(labels_train, seq_starts_train, 8, True)
+        print(predictions)
+        print(scores)
+        print(np.sum(np.exp(np.ma.masked_values(scores, -1)), 0))
 
         predictions = model.get_most_probable_labels(labels_train, seq_starts_train)
-        correct = 0
-        for i in range(len(predictions)):
-            if predictions[i] == gold_train[i]:
-                correct += 1
-        accuracy = correct / float(len(predictions))
-        self.assertGreaterEqual(accuracy, .95)
-
-    def test_get_label_distribution(self):
-        m = 500
-        n = 10
-        k = 3
-
-        model = HMM(k, n, acc_prior=0.0)
-        with torch.no_grad():
-            model.start_balance[0] = 0
-            model.start_balance[1] = 0.5
-            for i in range(n):
-                model.propensity[i] = 2
-                for j in range(k):
-                    model.accuracy[i, j] = 2
-            for i in range(k):
-                for j in range(k):
-                    model.transitions[i, j] = 1 if i == j else 0
-
-        labels_train, seq_starts_train, gold_train = _generate_data(
-            m, 8, 12, n,
-            model.get_accuracies(),
-            model.get_propensities(),
-            model.get_start_balance(),
-            model.get_transition_matrix())
-
-        p_unary, p_pairwise = model.get_label_distribution(
-            labels_train, seq_starts_train)
-
-        # Makes predictions using both unary and pairwise marginals
-        pred_unary = np.argmax(p_unary, axis=1) + 1
-        pred_pairwise = np.zeros((labels_train.shape[0],), dtype=np.int)
-        next_seq = 0
-        for i in range(labels_train.shape[0] - 1):
-            if next_seq == len(seq_starts_train) or i < seq_starts_train[next_seq] - 1:
-                # i is neither the start nor end of a sequence
-                pred_pairwise[i+1] = np.argmax(p_pairwise[i][pred_pairwise[i]])
-            elif i == seq_starts_train[next_seq]:
-                # i is the start of a sequence
-                a, b = np.unravel_index(p_pairwise[i].argmax(), (k, k))
-                pred_pairwise[i], pred_pairwise[i + 1] = a, b
-                next_seq += 1
-            else:
-                # i is the end of a sequence
-                pass
-        pred_pairwise += 1
-
-        # Checks that predictions are accurate
-        for predictions in (pred_unary, pred_pairwise):
-            correct = 0
-            for i in range(len(predictions)):
-                if predictions[i] == gold_train[i]:
-                    correct += 1
-            accuracy = correct / float(len(predictions))
-            self.assertGreaterEqual(accuracy, .95)
+        print(predictions)
+        assert False
+        # with torch.no_grad():
+        #     model.start_balance[0] = 0
+        #     model.start_balance[1] = 0.5
+        #     for i in range(n):
+        #         model.propensity[i] = 2
+        #         for j in range(k):
+        #             model.accuracy[i, j] = 2
+        #     for i in range(k):
+        #         for j in range(k):
+        #             model.transitions[i, j] = 1 if i == j else 0
+
+        # labels_train, seq_starts_train, gold_train = _generate_data(
+        #     m, 8, 12, n,
+        #     model.get_accuracies(),
+        #     model.get_propensities(),
+        #     model.get_start_balance(),
+        #     model.get_transition_matrix())
+
+        # predictions = model.get_most_probable_labels(labels_train, seq_starts_train)
+        # correct = 0
+        # for i in range(len(predictions)):
+        #     if predictions[i] == gold_train[i]:
+        #         correct += 1
+        # accuracy = correct / float(len(predictions))
+        # self.assertGreaterEqual(accuracy, .95)
+
+    # def test_get_label_distribution(self):
+    #     m = 500
+    #     n = 10
+    #     k = 3
+
+    #     model = HMM(k, n, acc_prior=0.0)
+    #     with torch.no_grad():
+    #         model.start_balance[0] = 0
+    #         model.start_balance[1] = 0.5
+    #         for i in range(n):
+    #             model.propensity[i] = 2
+    #             for j in range(k):
+    #                 model.accuracy[i, j] = 2
+    #         for i in range(k):
+    #             for j in range(k):
+    #                 model.transitions[i, j] = 1 if i == j else 0
+
+    #     labels_train, seq_starts_train, gold_train = _generate_data(
+    #         m, 8, 12, n,
+    #         model.get_accuracies(),
+    #         model.get_propensities(),
+    #         model.get_start_balance(),
+    #         model.get_transition_matrix())
+
+    #     p_unary, p_pairwise = model.get_label_distribution(
+    #         labels_train, seq_starts_train)
+
+    #     # Makes predictions using both unary and pairwise marginals
+    #     pred_unary = np.argmax(p_unary, axis=1) + 1
+    #     pred_pairwise = np.zeros((labels_train.shape[0],), dtype=np.int)
+    #     next_seq = 0
+    #     for i in range(labels_train.shape[0] - 1):
+    #         if next_seq == len(seq_starts_train) or i < seq_starts_train[next_seq] - 1:
+    #             # i is neither the start nor end of a sequence
+    #             pred_pairwise[i+1] = np.argmax(p_pairwise[i][pred_pairwise[i]])
+    #         elif i == seq_starts_train[next_seq]:
+    #             # i is the start of a sequence
+    #             a, b = np.unravel_index(p_pairwise[i].argmax(), (k, k))
+    #             pred_pairwise[i], pred_pairwise[i + 1] = a, b
+    #             next_seq += 1
+    #         else:
+    #             # i is the end of a sequence
+    #             pass
+    #     pred_pairwise += 1
+
+    #     # Checks that predictions are accurate
+    #     for predictions in (pred_unary, pred_pairwise):
+    #         correct = 0
+    #         for i in range(len(predictions)):
+    #             if predictions[i] == gold_train[i]:
+    #                 correct += 1
+    #         accuracy = correct / float(len(predictions))
+    #         self.assertGreaterEqual(accuracy, .95)
 
 
 def _generate_data(num_seqs, min_seq, max_seq, num_lfs, accuracies,
diff --git a/test/test_linked_hmm.py b/test/test_linked_hmm.py
index fec357a..77469a7 100644
--- a/test/test_linked_hmm.py
+++ b/test/test_linked_hmm.py
@@ -1,10 +1,13 @@
+import sys
+mypath = "/Users/zhengxinyong/Desktop/labelmodels"
+sys.path.append(mypath)
+
 from labelmodels import LinkedHMM, LearningConfig
 import numpy as np
 from scipy import sparse
 import torch
 import unittest
 
-
 class TestLinkedHMM(unittest.TestCase):
 
     def setUp(self):
@@ -13,218 +16,492 @@ def setUp(self):
     def tearDown(self):
         pass
 
-    def test_estimate_label_model_binary(self):
-        n1 = 5
-        n2 = 3
-        k = 2
-
-        label_accuracies = np.array([[.9, .8],
-                                     [.6, .7],
-                                     [.6, .6],
-                                     [.7, .6],
-                                     [.8, .8]])
-        link_accuracies = np.array([.8, .6, .8])
-        label_propensities = np.array([.9] * n1)
-        link_propensities = np.array([.9] * n1)
-        start_balance = np.array([.3, .7])
-        transitions = np.array([[.5, .5], [.3, .7]])
-
-        labels, links, seq_starts, gold = _generate_data(
-            1000, 8, 12, n1, n2,
-            label_accuracies,
-            link_accuracies,
-            label_propensities,
-            link_propensities,
-            start_balance,
-            transitions
-        )
-
-        model = LinkedHMM(k, n1, n2, acc_prior=0.0, balance_prior=0.0)
-        config = LearningConfig()
-        config.epochs = 3
-        model.estimate_label_model(labels, links, seq_starts, config=config)
-
-        for i in range(n1):
-            for j in range(k):
-                diff = label_accuracies[i, j] - model.get_accuracies()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n2):
-            for j in range(k):
-                diff = link_accuracies[i] - model.get_link_accuracies()[i]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n1):
-            diff = label_propensities[i] - model.get_propensities()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n2):
-            diff = link_propensities[i] - model.get_link_propensities()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            diff = start_balance[i] - model.get_start_balance()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            for j in range(k):
-                diff = transitions[i, j] - model.get_transition_matrix()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-
-    def test_estimate_label_model_multiclass(self):
-        n1 = 5
-        n2 = 3
-        k = 3
-
-        label_accuracies = np.array([[.9, .8, .5],
-                                     [.6, .7, .3],
-                                     [.6, .6, .8],
-                                     [.7, .6, .6],
-                                     [.8, .8, .9]])
-        link_accuracies = np.array([.8, .6, .8])
-        label_propensities = np.array([.9] * n1)
-        link_propensities = np.array([.9] * n1)
-        start_balance = np.array([.3, .3, .4])
-        transitions = np.array([[.5, .3, .2],
-                                [.4, .3, .3],
-                                [.3, .3, .4]])
-
-        labels, links, seq_starts, gold = _generate_data(
-            1000, 8, 12, n1, n2,
-            label_accuracies,
-            link_accuracies,
-            label_propensities,
-            link_propensities,
-            start_balance,
-            transitions
-        )
-
-        model = LinkedHMM(k, n1, n2, acc_prior=0.0, balance_prior=0.0)
-        config = LearningConfig()
-        config.epochs = 4
-        model.estimate_label_model(labels, links, seq_starts, config=config)
-
-        for i in range(n1):
-            for j in range(k):
-                diff = label_accuracies[i, j] - model.get_accuracies()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n2):
-            for j in range(k):
-                diff = link_accuracies[i] - model.get_link_accuracies()[i]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n1):
-            diff = label_propensities[i] - model.get_propensities()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(n2):
-            diff = link_propensities[i] - model.get_link_propensities()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            diff = start_balance[i] - model.get_start_balance()[i]
-            self.assertAlmostEqual(diff, 0.0, places=1)
-        for i in range(k):
-            for j in range(k):
-                diff = transitions[i, j] - model.get_transition_matrix()[i, j]
-                self.assertAlmostEqual(diff, 0.0, places=1)
-
-    def test_get_most_probable_labels(self):
-        m = 500
-        n1 = 3
-        n2 = 5
-        k = 3
-
-        model = LinkedHMM(k, n1, n2)
-        with torch.no_grad():
-            model.start_balance[0] = 0
-            model.start_balance[1] = 0.5
-            for i in range(n1):
-                model.propensity[i] = 0
-                for j in range(k):
-                    model.accuracy[i, j] = 1
-            for i in range(n2):
-                model.link_propensity[i] = 0
-                model.link_accuracy[i] = 1.5
-            for i in range(k):
-                for j in range(k):
-                    model.transitions[i, j] = 1 if i == j else 0
-
-        labels, links, seq_starts, gold = _generate_data(
-            m, 8, 12, n1, n2,
-            model.get_label_accuracies(),
-            model.get_link_accuracies(),
-            model.get_label_propensities(),
-            model.get_link_propensities(),
-            model.get_start_balance(),
-            model.get_transition_matrix())
-
-        predictions = model.get_most_probable_labels(labels, links, seq_starts)
-        correct = 0
-        for i in range(len(predictions)):
-            if predictions[i] == gold[i]:
-                correct += 1
-        accuracy = correct / float(len(predictions))
-        self.assertGreaterEqual(accuracy, .95)
-
-    def test_get_label_distribution(self):
-        m = 500
-        n1 = 3
-        n2 = 5
-        k = 3
-
-        model = LinkedHMM(k, n1, n2)
-        with torch.no_grad():
-            model.start_balance[0] = 0
-            model.start_balance[1] = 0.5
-            for i in range(n1):
-                model.propensity[i] = 0
-                for j in range(k):
-                    model.accuracy[i, j] = 1
-            for i in range(n2):
-                model.link_propensity[i] = 0
-                model.link_accuracy[i] = 1.5
-            for i in range(k):
-                for j in range(k):
-                    model.transitions[i, j] = 1 if i == j else 0
-
-        labels, links, seq_starts, gold = _generate_data(
-            m, 8, 12, n1, n2,
-            model.get_label_accuracies(),
-            model.get_link_accuracies(),
-            model.get_label_propensities(),
-            model.get_link_propensities(),
-            model.get_start_balance(),
-            model.get_transition_matrix())
-
-        p_unary, p_pairwise = model.get_label_distribution(
-            labels, links, seq_starts)
-
-        # Makes predictions using both unary and pairwise marginals
-        pred_unary = np.argmax(p_unary, axis=1) + 1
-        pred_pairwise = np.zeros((labels.shape[0],), dtype=np.int)
-        next_seq = 0
-        for i in range(labels.shape[0] - 1):
-            if next_seq == len(seq_starts) or i < seq_starts[next_seq] - 1:
-                # i is neither the start nor end of a sequence
-                pred_pairwise[i+1] = np.argmax(p_pairwise[i][pred_pairwise[i]])
-            elif i == seq_starts[next_seq]:
-                # i is the start of a sequence
-                a, b = np.unravel_index(p_pairwise[i].argmax(), (k, k))
-                pred_pairwise[i], pred_pairwise[i + 1] = a, b
-                next_seq += 1
-            else:
-                # i is the end of a sequence
-                pass
-        pred_pairwise += 1
-
-        # Checks that predictions are accurate
-        for predictions in (pred_unary, pred_pairwise):
-            correct = 0
-            for i in range(len(predictions)):
-                if predictions[i] == gold[i]:
-                    correct += 1
-            accuracy = correct / float(len(predictions))
-            self.assertGreaterEqual(accuracy, .95)
+    # def test_estimate_label_model_binary(self):
+    #     n1 = 5
+    #     n2 = 3
+    #     k = 2
+
+    #     label_accuracies = np.array([[.9, .8],
+    #                                  [.6, .7],
+    #                                  [.6, .6],
+    #                                  [.7, .6],
+    #                                  [.8, .8]])
+    #     link_accuracies = np.array([.8, .6, .8])
+    #     label_propensities = np.array([.9] * n1)
+    #     link_propensities = np.array([.9] * n1)
+    #     start_balance = np.array([.3, .7])
+    #     transitions = np.array([[.5, .5], [.3, .7]])
+
+    #     labels, links, seq_starts, gold = _generate_data(
+    #         1000, 8, 12, n1, n2,
+    #         label_accuracies,
+    #         link_accuracies,
+    #         label_propensities,
+    #         link_propensities,
+    #         start_balance,
+    #         transitions
+    #     )
+
+    #     model = LinkedHMM(k, n1, n2, acc_prior=0.0, balance_prior=0.0)
+    #     config = LearningConfig()
+    #     config.epochs = 3
+    #     model.estimate_label_model(labels, links, seq_starts, config=config)
+
+    #     for i in range(n1):
+    #         for j in range(k):
+    #             diff = label_accuracies[i, j] - model.get_accuracies()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n2):
+    #         for j in range(k):
+    #             diff = link_accuracies[i] - model.get_link_accuracies()[i]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n1):
+    #         diff = label_propensities[i] - model.get_propensities()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n2):
+    #         diff = link_propensities[i] - model.get_link_propensities()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         diff = start_balance[i] - model.get_start_balance()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         for j in range(k):
+    #             diff = transitions[i, j] - model.get_transition_matrix()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+
+    # def test_estimate_label_model_multiclass(self):
+    #     n1 = 5
+    #     n2 = 3
+    #     k = 3
+
+    #     label_accuracies = np.array([[.9, .8, .5],
+    #                                  [.6, .7, .3],
+    #                                  [.6, .6, .8],
+    #                                  [.7, .6, .6],
+    #                                  [.8, .8, .9]])
+    #     link_accuracies = np.array([.8, .6, .8])
+    #     label_propensities = np.array([.9] * n1)
+    #     link_propensities = np.array([.9] * n1)
+    #     start_balance = np.array([.3, .3, .4])
+    #     transitions = np.array([[.5, .3, .2],
+    #                             [.4, .3, .3],
+    #                             [.3, .3, .4]])
+
+    #     labels, links, seq_starts, gold = _generate_data(
+    #         1000, 8, 12, n1, n2,
+    #         label_accuracies,
+    #         link_accuracies,
+    #         label_propensities,
+    #         link_propensities,
+    #         start_balance,
+    #         transitions
+    #     )
+
+    #     model = LinkedHMM(k, n1, n2, acc_prior=0.0, balance_prior=0.0)
+    #     config = LearningConfig()
+    #     config.epochs = 4
+    #     model.estimate_label_model(labels, links, seq_starts, config=config)
+
+    #     for i in range(n1):
+    #         for j in range(k):
+    #             diff = label_accuracies[i, j] - model.get_accuracies()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n2):
+    #         for j in range(k):
+    #             diff = link_accuracies[i] - model.get_link_accuracies()[i]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n1):
+    #         diff = label_propensities[i] - model.get_propensities()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(n2):
+    #         diff = link_propensities[i] - model.get_link_propensities()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         diff = start_balance[i] - model.get_start_balance()[i]
+    #         self.assertAlmostEqual(diff, 0.0, places=1)
+    #     for i in range(k):
+    #         for j in range(k):
+    #             diff = transitions[i, j] - model.get_transition_matrix()[i, j]
+    #             self.assertAlmostEqual(diff, 0.0, places=1)
+
+    # def test_get_most_probable_labels(self):
+    #     m = 500
+    #     n1 = 3
+    #     n2 = 5
+    #     k = 3
+
+    #     model = LinkedHMM(k, n1, n2)
+    #     with torch.no_grad():
+    #         model.start_balance[0] = 0
+    #         model.start_balance[1] = 0.5
+    #         for i in range(n1):
+    #             model.propensity[i] = 0
+    #             for j in range(k):
+    #                 model.accuracy[i, j] = 1
+    #         for i in range(n2):
+    #             model.link_propensity[i] = 0
+    #             model.link_accuracy[i] = 1.5
+    #         for i in range(k):
+    #             for j in range(k):
+    #                 model.transitions[i, j] = 1 if i == j else 0
+
+    #     labels, links, seq_starts, gold = _generate_data(
+    #         m, 8, 12, n1, n2,
+    #         model.get_label_accuracies(),
+    #         model.get_link_accuracies(),
+    #         model.get_label_propensities(),
+    #         model.get_link_propensities(),
+    #         model.get_start_balance(),
+    #         model.get_transition_matrix())
+
+    #     predictions = model.get_most_probable_labels(labels, links, seq_starts)
+    #     correct = 0
+    #     for i in range(len(predictions)):
+    #         if predictions[i] == gold[i]:
+    #             correct += 1
+    #     accuracy = correct / float(len(predictions))
+    #     self.assertGreaterEqual(accuracy, .95)
+
+    # def test_get_label_distribution(self):
+    #     m = 500
+    #     n1 = 3
+    #     n2 = 5
+    #     k = 3
+
+    #     model = LinkedHMM(k, n1, n2)
+    #     with torch.no_grad():
+    #         model.start_balance[0] = 0
+    #         model.start_balance[1] = 0.5
+    #         for i in range(n1):
+    #             model.propensity[i] = 0
+    #             for j in range(k):
+    #                 model.accuracy[i, j] = 1
+    #         for i in range(n2):
+    #             model.link_propensity[i] = 0
+    #             model.link_accuracy[i] = 1.5
+    #         for i in range(k):
+    #             for j in range(k):
+    #                 model.transitions[i, j] = 1 if i == j else 0
+
+    #     labels, links, seq_starts, gold = _generate_data(
+    #         m, 8, 12, n1, n2,
+    #         model.get_label_accuracies(),
+    #         model.get_link_accuracies(),
+    #         model.get_label_propensities(),
+    #         model.get_link_propensities(),
+    #         model.get_start_balance(),
+    #         model.get_transition_matrix())
+
+    #     p_unary, p_pairwise = model.get_label_distribution(
+    #         labels, links, seq_starts)
+
+    #     # Makes predictions using both unary and pairwise marginals
+    #     pred_unary = np.argmax(p_unary, axis=1) + 1
+    #     pred_pairwise = np.zeros((labels.shape[0],), dtype=np.int)
+    #     next_seq = 0
+    #     for i in range(labels.shape[0] - 1):
+    #         if next_seq == len(seq_starts) or i < seq_starts[next_seq] - 1:
+    #             # i is neither the start nor end of a sequence
+    #             pred_pairwise[i+1] = np.argmax(p_pairwise[i][pred_pairwise[i]])
+    #         elif i == seq_starts[next_seq]:
+    #             # i is the start of a sequence
+    #             a, b = np.unravel_index(p_pairwise[i].argmax(), (k, k))
+    #             pred_pairwise[i], pred_pairwise[i + 1] = a, b
+    #             next_seq += 1
+    #         else:
+    #             # i is the end of a sequence
+    #             pass
+    #     pred_pairwise += 1
+
+    #     # Checks that predictions are accurate
+    #     for predictions in (pred_unary, pred_pairwise):
+    #         correct = 0
+    #         for i in range(len(predictions)):
+    #             if predictions[i] == gold[i]:
+    #                 correct += 1
+    #         accuracy = correct / float(len(predictions))
+    #         self.assertGreaterEqual(accuracy, .95)
+
+    def test_get_k_most_probable_labels(self):
+            m = 34 # num_seqs
+            n1 = 4 # num_labeling_funcs
+            n2 = 5 # num_linking_funcs
+            k = 4 # num_classes
+
+            model = LinkedHMM(k, n1, n2)
+            with torch.no_grad():
+                model.start_balance[0] = 0
+                model.start_balance[1] = 0.5
+                for i in range(n1):
+                    model.propensity[i] = 0
+                    for j in range(k):
+                        model.accuracy[i, j] = 1
+                for i in range(n2):
+                    model.link_propensity[i] = 0
+                    model.link_accuracy[i] = 1.5
+                for i in range(k):
+                    for j in range(k):
+                        model.transitions[i, j] = 1 if i == j else 0
+
+            labels, links, seq_starts, gold = _generate_data(
+                m, 7, 7, n1, n2,
+                model.get_label_accuracies(),
+                model.get_link_accuracies(),
+                model.get_label_propensities(),
+                model.get_link_propensities(),
+                model.get_start_balance(),
+                model.get_transition_matrix())
+
+            # model.estimate_label_model(labels, links, seq_starts)
+            # # assert that when topk = 1, the output of get_k most_probable_labels is the same as get_most_probable_labels
+            # # for the fact that torch.argmax (used in get_most_probable_labels) == torch.topk (used in get_k_most_probable_labels) 
+            # # when topk = 1. 
+            # predictions = model.get_most_probable_labels(labels, links, seq_starts)
+            # k_predictions = model.get_k_most_probable_labels(labels, links, seq_starts, topk=1)
+            # self.assertIsNone(np.testing.assert_array_equal(k_predictions[0], predictions)) 
+
+            # # assert that when topk > 1, the viterbi_scores of the first sequence from get_k most_probable_labels 
+            # # is the same as get_most_probable_labels
+            # viterbi_scores = model.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+            # k_viterbi_scores = model.get_k_most_probable_labels(labels, links, seq_starts, topk=9, return_viterbi_scores=True)
+            # self.assertIsNone(np.testing.assert_array_equal(k_viterbi_scores[:, 0], viterbi_scores))
+
+            # # assert that when topk > 1, all sequences from get_k most_probable_labels are different from one another
+            # k_predictions = model.get_k_most_probable_labels(labels, links, seq_starts, topk=3)
+            # self.assertEqual(np.unique(k_predictions, axis=0).shape[0], k_predictions.shape[0])
+
+            # # assert that when topk > 1, the viterbi scores are in a non-increasing order.
+
+            ### 👀 Ontonotes
+            labels, links, seq_starts = torch.load("/Users/zhengxinyong/Desktop/labelmodels/downloads/link_hmm_inputs.pt")
+            print(labels.shape, links.shape) #### change num_labeling_funcs according given NoABS labeling function
+            acc_prior = 50
+            link_hmm = LinkedHMM(
+                num_classes=4,
+                num_labeling_funcs=8, 
+                num_linking_funcs=7,
+                init_acc=0.7,
+                acc_prior=acc_prior,
+                balance_prior=100)
+            # link_hmm.estimate_label_model(labels, links, seq_starts)
+            link_hmm_saved_fp = f"/Users/zhengxinyong/Desktop/labelmodels/ontonotes/labelmodel_link_hmm_prior_{acc_prior}.pt"
+            # # torch.save(link_hmm, link_hmm_saved_fp)
+            link_hmm = torch.load(link_hmm_saved_fp)
+            print(f"✅ Done loading link hmm with acc_prior={acc_prior}.")
+            print("get label accuracies:")
+            print(link_hmm.get_label_accuracies())
+            print("get label propensities:")
+            print(link_hmm.get_label_accuracies())
+            print("get link accuracies:")
+            print(link_hmm.get_link_accuracies())
+            print("get link propensities:")
+            print(link_hmm.get_link_propensities())
+            print("get start balance:")
+            print(link_hmm.get_start_balance())
+            print("get transition matrix:")
+            print(link_hmm.get_transition_matrix())
+            
+            print(link_hmm.get_label_distribution(labels, links, seq_starts))
+
+            # K = [5000]
+            # for k in K:
+            #     # viterbi_paths, viterbi_scores = link_hmm.get_k_most_probable_labels(labels[12066:12071, :], links[12066:12071, :], [0], topk=k, return_viterbi_scores=True)
+            #     viterbi_paths, viterbi_scores = link_hmm.get_k_most_probable_labels(labels, links, seq_starts, topk=k, return_viterbi_scores=True)
+            #     # print(seq_starts)
+            #     # print(np.sum(np.exp(viterbi_scores), 0))
+            #     print(f"✅ Done generating {k} (acc prior {acc_prior}).")
+            #     print(viterbi_scores)
+            #     torch.save(viterbi_paths, f"/Users/zhengxinyong/Desktop/labelmodels/ontonotes/{k}_viterbi_paths_prior_{acc_prior}.pt")
+            #     torch.save(viterbi_scores, f"/Users/zhengxinyong/Desktop/labelmodels/ontonotes/{k}_viterbi_scores_prior_{acc_prior}.pt")
+            
+            # viterbi_paths = torch.load("/Users/zhengxinyong/Desktop/labelmodels/ontonotes/5_viterbi_paths.pt")
+            # viterbi_scores = torch.load("/Users/zhengxinyong/Desktop/labelmodels/ontonotes/5_viterbi_scores.pt")
+            # print(viterbi_scores)
+            # print(viterbi_paths.shape)
+            # print(viterbi_scores.shape)
+            
+            #### Ontonotes instance check: ensure that viterbi paths are correct (when topk > possible enumeration of sequences)
+            # instance 780 only has 4 tokens
+            # if k > 256:
+            #     self.assertEqual(sum(viterbi_paths[256][seq_starts[780]:seq_starts[781]]), -4)
+            #     self.assertEqual(viterbi_scores[256][780], -1)
+
+            # instance 498, 561, 751, 769, 779, 784, 821, 822, 858 have 5 tokens
+
+            # #### Ontonotes instance check: check scores
+            # self.assertEqual(viterbi_scores[0][31], -112.71656799316406)
+            # predictions, scores = link_hmm.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+            # self.assertEqual(scores[31], -112.71656799316406)
+
+            # #### NECESSARY BUT INSUFFICIENT - comparison between get_most_probable and get_k_most_probable
+            # self.assertIsNone(np.testing.assert_array_equal(viterbi_paths[0, :], predictions)) 
+            # self.assertIsNone(np.testing.assert_array_equal(viterbi_scores[0, :], scores)) 
+
+
+            # #### 💻 Laptop Reviews
+            # labels, links, seq_starts = torch.load("/Users/zhengxinyong/Desktop/labelmodels/downloads/laptop_link_hmm_inputs.pt")
+            # # print(labels.shape, links.shape) #### change num_labeling_funcs according given NoABS labeling function
+            # link_hmm_saved_fp = f"/Users/zhengxinyong/Desktop/labelmodels/downloads/laptop_esteban_link_hmm.pt"
+            # link_hmm = torch.load(link_hmm_saved_fp)
+            # print("get label accuracies:")
+            # print(link_hmm.get_label_accuracies())
+            # print("get label propensities:")
+            # print(link_hmm.get_label_accuracies())
+            # print("get link accuracies:")
+            # print(link_hmm.get_link_accuracies())
+            # print("get link propensities:")
+            # print(link_hmm.get_link_propensities())
+            # print("get start balance:")
+            # print(link_hmm.get_start_balance())
+            # print("get transition matrix:")
+            # print(link_hmm.get_transition_matrix())
+            # print(f"smallest number of tokens in a sequence: {min([seq_starts[i + 1] - seq_starts[i] for i in range(len(seq_starts) - 1)])}")
+            # print(f"largest number of tokens in a sequence: {max([seq_starts[i + 1] - seq_starts[i] for i in range(len(seq_starts) - 1)])}")
+
+            # K = [1, 2, 3, 4, 5]
+            # for k in K:
+            #     viterbi_paths, viterbi_scores = link_hmm.get_k_most_probable_labels(labels, links, seq_starts, topk=k, return_viterbi_scores=True)
+            #     torch.save(viterbi_paths, f"/Users/zhengxinyong/Desktop/labelmodels/laptop/{k}_viterbi_paths.pt")
+            #     torch.save(viterbi_scores, f"/Users/zhengxinyong/Desktop/labelmodels/laptop/{k}_viterbi_scores.pt")
+            #     print(f"✅ Laptop Reviews: Done generating {k}.")
+
+            # predictions, scores = link_hmm.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+            # viterbi_paths = torch.load(f"/Users/zhengxinyong/Desktop/labelmodels/laptop/10_viterbi_paths.pt")
+
+            # self.assertIsNone(np.testing.assert_array_equal(viterbi_paths[0, :], predictions)) 
+            # print(-1 in viterbi_paths)
+
+            # #### 🥼 NCBI
+            # labels, links, seq_starts = torch.load("/Users/zhengxinyong/Desktop/labelmodels/downloads/ncbi_link_hmm_inputs.pt")
+            # # print(labels.shape, links.shape) #### change num_labeling_funcs according given NoABS labeling function
+            # link_hmm_saved_fp = f"/Users/zhengxinyong/Desktop/labelmodels/downloads/ncbi_esteban_link_hmm.pt"
+            # link_hmm = torch.load(link_hmm_saved_fp)
+            # print("get label accuracies:")
+            # print(link_hmm.get_label_accuracies())
+            # print("get label propensities:")
+            # print(link_hmm.get_label_accuracies())
+            # print("get link accuracies:")
+            # print(link_hmm.get_link_accuracies())
+            # print("get link propensities:")
+            # print(link_hmm.get_link_propensities())
+            # print("get start balance:")
+            # print(link_hmm.get_start_balance())
+            # print("get transition matrix:")
+            # print(link_hmm.get_transition_matrix())
+
+            # print(f"smallest number of tokens in a sequence: {min([seq_starts[i + 1] - seq_starts[i] for i in range(len(seq_starts) - 1)])}")
+            # print(f"largest number of tokens in a sequence: {max([seq_starts[i + 1] - seq_starts[i] for i in range(len(seq_starts) - 1)])}")
+
+            # for i in range(len(seq_starts) - 1):
+            #     print(seq_starts[i + 1] - seq_starts[i], seq_starts[i], seq_starts[i + 1])
+
+            # K = [1, 2, 3, 4, 5]
+            # for k in K:
+            #     viterbi_paths, viterbi_scores = link_hmm.get_k_most_probable_labels(labels, links, seq_starts, topk=k, return_viterbi_scores=True)
+            #     torch.save(viterbi_paths, f"/Users/zhengxinyong/Desktop/labelmodels/ncbi/{k}_viterbi_paths.pt")
+            #     torch.save(viterbi_scores, f"/Users/zhengxinyong/Desktop/labelmodels/ncbi/{k}_viterbi_scores.pt")
+            #     print(f"✅ NCBI: Done generating {k}.")
+
+            # viterbi_paths = torch.load(f"/Users/zhengxinyong/Desktop/labelmodels/ncbi/1_viterbi_paths.pt")
+            # # print(viterbi_paths[0, 1242:1316])
+            # # predictions, scores = link_hmm.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+            # self.assertIsNone(np.testing.assert_array_equal(viterbi_paths[0, :], predictions)) 
+            
+            
+            # #### 💿 CDR
+            # labels, links, seq_starts = torch.load("/Users/zhengxinyong/Desktop/labelmodels/downloads/cdr_link_hmm_inputs.pt")
+            # # print(labels.shape, links.shape) #### change num_labeling_funcs according given NoABS labeling function
+            # link_hmm_saved_fp = f"/Users/zhengxinyong/Desktop/labelmodels/downloads/cdr_esteban_link_hmm.pt"
+            # link_hmm = torch.load(link_hmm_saved_fp)
+            # print("get label accuracies:")
+            # print(link_hmm.get_label_accuracies())
+            # print("get label propensities:")
+            # print(link_hmm.get_label_accuracies())
+            # print("get link accuracies:")
+            # print(link_hmm.get_link_accuracies())
+            # print("get link propensities:")
+            # print(link_hmm.get_link_propensities())
+            # print("get start balance:")
+            # print(link_hmm.get_start_balance())
+            # print("get transition matrix:")
+            # print(link_hmm.get_transition_matrix())
+
+            # print(f"smallest number of tokens in a sequence: {min([seq_starts[i + 1] - seq_starts[i] for i in range(len(seq_starts) - 1)])}")
+            # print(f"largest number of tokens in a sequence: {max([seq_starts[i + 1] - seq_starts[i] for i in range(len(seq_starts) - 1)])}")
+
+            # K = [1, 2, 3, 4, 5]
+            # for k in K:
+            #     viterbi_paths, viterbi_scores = link_hmm.get_k_most_probable_labels(labels, links, seq_starts, topk=k, return_viterbi_scores=True)
+            #     torch.save(viterbi_paths, f"/Users/zhengxinyong/Desktop/labelmodels/cdr/{k}_viterbi_paths.pt")
+            #     torch.save(viterbi_scores, f"/Users/zhengxinyong/Desktop/labelmodels/cdr/{k}_viterbi_scores.pt")
+            #     print(f"✅ BC5CDR: Done generating {k}.")
+
+            # viterbi_paths = torch.load(f"/Users/zhengxinyong/Desktop/labelmodels/cdr/10_viterbi_paths.pt")
+            # predictions, scores = link_hmm.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+            # self.assertIsNone(np.testing.assert_array_equal(viterbi_paths[0, :], predictions)) 
+
+            # print(-1 in viterbi_paths)
+            
+            
+
+    # def test_compute_viterbi(self):
+    #     m = 1 # num_seqs
+    #     n1 = 4 # num_labeling_funcs
+    #     n2 = 5 # num_linking_funcs
+    #     k = 3 # num_classes
+
+    #     model = LinkedHMM(k, n1, n2)
+    #     with torch.no_grad():
+    #         model.start_balance[0] = 0
+    #         model.start_balance[1] = 0.5
+    #         for i in range(n1):
+    #             model.propensity[i] = 0
+    #             for j in range(k):
+    #                 model.accuracy[i, j] = 1
+    #         for i in range(n2):
+    #             model.link_propensity[i] = 0
+    #             model.link_accuracy[i] = 1.5
+    #         for i in range(k):
+    #             for j in range(k):
+    #                 model.transitions[i, j] = 1 if i == j else 0
+
+    #     labels, links, seq_starts, gold = _generate_data(
+    #         m, 20, 20, n1, n2,
+    #         model.get_label_accuracies(),
+    #         model.get_link_accuracies(),
+    #         model.get_label_propensities(),
+    #         model.get_link_propensities(),
+    #         model.get_start_balance(),
+    #         model.get_transition_matrix())
+        
+    #     predictions = model.get_most_probable_labels(labels, links, seq_starts)
+    #     scores = model.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+        
+    #     path_scores_list = model.compute_viterbi(labels, links, seq_starts)
+
+    #     for i in range(len(seq_starts)):
+    #         start_idx = seq_starts[i]
+    #         if i == len(seq_starts) - 1:
+    #             end_idx = len(predictions)
+    #         else:
+    #             end_idx = seq_starts[i + 1]
+    #         path = ''.join(map(str, list(predictions[start_idx:end_idx]-1)))
+    #         print(path, scores[i], path_scores_list[i][path])
+    #         assert scores[i] == path_scores_list[i][path]
+
+
+    #     # ### TODO: test get_k_most_probable_labels
+    #     # predictions = model.get_k_most_probable_labels(labels, links, seq_starts, topk=2)
+    #     # scores = model.get_most_probable_labels(labels, links, seq_starts, return_viterbi_scores=True)
+    #     # path_scores_list = model.compute_viterbi(labels, links, seq_starts)
+        
 
 
 def _generate_data(num_seqs, min_seq, max_seq, num_label_funcs, num_link_funcs,
                    label_accs, link_accs, label_propensities, link_propensities,
                    start_balance, transitions):
     # Generates sequence starts
-    seq_starts = np.zeros((num_seqs,), dtype=np.int)
+    seq_starts = np.zeros((num_seqs,), dtype=int)
     total_len = 0
     for i in range(num_seqs):
         seq_len = np.random.randint(min_seq, max_seq + 1)
@@ -233,7 +510,7 @@ def _generate_data(num_seqs, min_seq, max_seq, num_label_funcs, num_link_funcs,
             seq_starts[i + 1] = total_len
 
     # Generates sequences of gold labels
-    gold = np.zeros((total_len,), dtype=np.int)
+    gold = np.zeros((total_len,), dtype=int)
     next_start = 0
     for i in range(total_len):
         if next_start < len(seq_starts) and i == seq_starts[next_start]:
@@ -286,6 +563,7 @@ def _generate_data(num_seqs, min_seq, max_seq, num_label_funcs, num_link_funcs,
 
     return labels, links, seq_starts, gold
 
+    
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/wiser_tanl/linked_hmm.py b/wiser_tanl/linked_hmm.py
new file mode 100644
index 0000000..50213e1
--- /dev/null
+++ b/wiser_tanl/linked_hmm.py
@@ -0,0 +1,132 @@
+import sys
+mypath = "/Users/zhengxinyong/Desktop/labelmodels"
+sys.path.append(mypath)
+
+
+from labelmodels import LinkedHMM, LearningConfig
+import numpy as np
+from scipy import sparse
+from scipy import special
+import torch
+from tqdm import tqdm
+
+def sample_k_labels(input_fp, output_fp, dataset, k=1000):
+    # TODO: integrate into LinkedHMM
+    labels, links, seq_starts = torch.load(f"{input_fp}/{dataset}_link_hmm_inputs.pt")
+    model_saved_fp = f"{input_fp}/{dataset}_link_hmm.pt"
+    model = torch.load(model_saved_fp)
+    print(f"✅ Done loading link hmm.")
+    print("get label accuracies:")
+    print(model.get_label_accuracies())
+
+    # p_unary, p_pairwise = model.get_label_distribution(labels, links, seq_starts)
+    # torch.save([p_unary, p_pairwise], f"{output_fp}/emp_dist/{dataset}_unary_pairwise.pt")
+    p_unary, p_pairwise = torch.load(f"{output_fp}/emp_dist/{dataset}_unary_pairwise.pt")
+
+    print(f"⛏ Sampling k={k} label sequences")
+    paths = np.zeros((k, p_unary.shape[0]), dtype=int)
+    instance_idx = -1
+    num_choices = p_unary.shape[1]
+    for i in tqdm(range(k)):
+        for j in range(p_unary.shape[0]):
+            if j in seq_starts:
+                instance_idx += 1
+                label = np.random.choice(num_choices, size=1, p=p_unary[j]/p_unary[j].sum())
+                paths[i][j] = label[0] + 1
+            else:
+                prev_label = paths[i][j - 1] - 1
+                next_label_dist = p_pairwise[j - 1][prev_label]
+                label = np.random.choice(num_choices, size=1, p=next_label_dist/next_label_dist.sum())
+                paths[i][j] = label[0] + 1
+        instance_idx = -1
+
+    torch.save(paths, f"{output_fp}/{dataset}_{k}_sampled_paths.pt")
+    return paths
+
+def sampling_empirical_distribution(input_fp, output_fp, dataset, k=100):
+    paths = torch.load(f"{output_fp}/{dataset}_{k}_sampled_paths.pt")
+    model_saved_fp = f"{input_fp}/{dataset}_link_hmm.pt"
+    model = torch.load(model_saved_fp)
+    empirical_dist = np.zeros((paths.shape[1], model.get_label_accuracies().shape[1]))
+    print("empirical_dist.shape:", empirical_dist.shape)
+    
+    for path_k in tqdm(range(paths.shape[0])):
+        path = paths[path_k]
+
+        for i in range(len(path)):
+            if path[i] < 0:
+                continue
+            
+            empirical_dist[i][path[i] - 1] += 1
+    empirical_dist = empirical_dist / k
+    np.save(open(f"{output_fp}/emp_dist/{dataset}_sampled_{k}.npy", "wb"), empirical_dist)
+
+def top_k_empirical_distribution(input_fp, output_fp, dataset, acc_prior=50, k=50):
+    labels, links, seq_starts = torch.load(f"{input_fp}/{dataset}_link_hmm_inputs.pt")
+    link_hmm_saved_fp = f"{input_fp}/labelmodel_link_hmm_prior_{acc_prior}.pt"
+    link_hmm = torch.load(link_hmm_saved_fp)
+    print(f"✅ Done loading link hmm with acc_prior={acc_prior}.")
+    print("get label accuracies:")
+    print(link_hmm.get_label_accuracies())
+    # print("get label propensities:")
+    # print(link_hmm.get_label_accuracies())
+    # print("get link accuracies:")
+    # print(link_hmm.get_link_accuracies())
+    # print("get link propensities:")
+    # print(link_hmm.get_link_propensities())
+    # print("get start balance:")
+    # print(link_hmm.get_start_balance())
+    # print("get transition matrix:")
+    # print(link_hmm.get_transition_matrix())
+
+    # getting empirical distribution
+    print(f"Getting empirical distribution (k = {k}):")
+    viterbi_paths = torch.load(f"{output_fp}/{k}_viterbi_paths_prior_50.pt")
+    viterbi_scores = torch.load(f"{output_fp}/{k}_viterbi_scores_prior_50.pt")
+    viterbi_scores = np.ma.masked_where(viterbi_scores == -1, viterbi_scores)
+
+    empirical_dist = np.zeros((viterbi_paths.shape[1], link_hmm.get_label_accuracies().shape[1]))
+    print("empirical_dist.shape:", empirical_dist.shape)
+    
+    for path_k in tqdm(range(viterbi_paths.shape[0])):
+        path = viterbi_paths[path_k]
+
+        for i in range(len(path)):
+            if path[i] < 0:
+                continue
+
+            if i in seq_starts:
+                score = viterbi_scores[path_k][np.where(seq_starts == i)[0][0]]
+                all_scores = viterbi_scores[:, np.where(seq_starts == i)[0][0]]
+                all_scores = all_scores[all_scores.mask == False]
+                total_score = special.logsumexp(all_scores.filled())
+            
+            empirical_dist[i][path[i] - 1] += np.exp(score - total_score)  # weighted by scores
+    
+    print(empirical_dist[17065:17069])
+    np.save(open(f"{output_fp}/emp_dist/{dataset}_top_{k}.npy", "wb"), empirical_dist)
+
+def get_posterior_marginal(dataset):
+    # right now I treat unary marginal as posterior marginal
+    ...
+
+
+if __name__ == '__main__':
+    # top_k_empirical_distribution(
+    #     input_fp="/Users/zhengxinyong/Desktop/labelmodels/inputs/ontonotes", 
+    #     output_fp="/Users/zhengxinyong/Desktop/labelmodels/outputs/ontonotes",
+    #     dataset="ontonotes")
+    
+
+    for k in [2000, 3000, 10000]:
+        sample_k_labels(
+            input_fp="/Users/zhengxinyong/Desktop/labelmodels/inputs/ontonotes", 
+            output_fp="/Users/zhengxinyong/Desktop/labelmodels/outputs/ontonotes",
+            dataset="ontonotes",
+            k=k)
+
+        sampling_empirical_distribution(
+            input_fp="/Users/zhengxinyong/Desktop/labelmodels/inputs/ontonotes", 
+            output_fp="/Users/zhengxinyong/Desktop/labelmodels/outputs/ontonotes",
+            dataset="ontonotes",
+            k=k)