function loglike = loglikelihood(B, Y, X, robust, g00, g11,l2_lamb,l1_lamb)

if nargin<7;
    % warnings for dummys
    mssg='\n\nLOGLIKELIHOOD missing lambda argument\nl1_lamb = 0\n\n';
    for i = 1:100; fprintf(mssg); end
    l1_lamb = 0;
end
if nargin<8;
    % warnings for dummys
    mssg='\n\nLOGLIKELIHOOD missing lambda argument\nl2_lamb = 0\n\n';
    for i = 1:100; fprintf(mssg); end
    l2_lamb = 0;
end

% g00: % P(estimate =0 | true label = 0) aka specificity (about .7)
g01 = 1-g00;

% g11: % P(estimate =1 | true_label = 1) aka sensitivity (about 1)
g10 = 1-g11;

Yhat = 1 ./ (1 + exp(- X* B));

if ~robust
    loglike = (-1 * sum( Y.*log(Yhat) + (1-Y).*log(1-Yhat)))/numel(Y);
    % eqn (1), line 3 "Simplified Labeling ..."
elseif robust
    loglike = (- 1 * sum(Y.*log(g11*Yhat+g01*(1-Yhat)) + ...
        (1-Y).*log(g00*(1-Yhat)+g10*Yhat))) / numel(Y);
    % eqn (4)
else
    error('Need to enter robust parameter as 0/1');
end



% apply lambda
if numel(B)>1
    l2norm = sum(B.^2);
    l1norm = sum(abs(B));
    loglike = loglike + l2_lamb*l2norm + l1_lamb*l1norm; 
end

% % debugging
% if l1_lamb>0 || l2_lamb>0;
%      fprintf('L(Y|XB): %4f   (B)^2: %4f (%4f)    |B|: %4f (%4f)\n',...
%          loglike, l2norm, l2_lamb*l2norm, l1norm, l1_lamb*l1norm);
% end

% -1 because we want to minimize this!
%
% note that in both cases Y is an indicator function and has the 
% same function as (y+1)/2 in the paper.