function [dataset,filter]=conn_bidsdir(filenames,varargin) % CONN_BIDSDIR lists contents of BIDS folder % % dataset=conn_bidsdir(rootfolder) % % e.g. conn_bidsdir /data/myconnectome % % if nargin<1||isempty(filenames), filenames=pwd; end % bids filename spec 2018/7 %spec={'sub','ses','task','acq','ce','rec','dir','run','mod','echo','recording','proc'}; spec={'sub','ses','task','acq','ce','rec','dir','run','mod','echo','recording','proc','space','desc'}; filter={}; if isstruct(filenames) dataset=filenames; if ~isempty(varargin)&&isequal(varargin{1},'noempty'), noempty=true; varargin=varargin(2:end); else noempty=false; end if ~isempty(varargin)&&isequal(varargin{1},'forcefirst'), forcefirst=true; varargin=varargin(2:end); else forcefirst=false; end filter=varargin; filtervalid=true(size(filter)); select=true; for n=1:2:numel(varargin)-1 if isempty(varargin{n+1}), temp=true(size(dataset.data.(varargin{n}))); else temp=ismember(dataset.data.(varargin{n}),varargin{n+1}); end if ~isempty(temp)&&~any(temp)&&~isempty(varargin{n+1}) % special case regexp wildcards or #EMPTY keyword if isequal(varargin{n+1},'#EMPTY'), temp=select&cellfun('length',dataset.data.(varargin{n}))==0; elseif ischar(varargin{n+1})&&any(varargin{n+1}=='*'), temp=select&cellfun('length',regexp(dataset.data.(varargin{n}),varargin{n+1})); elseif iscell(varargin{n+1})&&numel(varargin{n+1})==1&&ischar(varargin{n+1}{1})&&any(char(varargin{n+1}{1})=='*'), temp=select&cellfun('length',regexp(dataset.data.(varargin{n}),varargin{n+1}{1})); end end temp=select&temp; if forcefirst&&n>1, if any(temp~=select), filtervalid([n,n+1])=false; end elseif noempty, % skip filter conditions that lead to empty selection if any(temp), select=temp; else filtervalid([n,n+1])=false; end else select=temp; end end filter=filter(filtervalid); if any(~select) fnames=fieldnames(dataset.data); for n=1:numel(fnames) dataset.data.(fnames{n})=dataset.data.(fnames{n})(select); end end else files_all=conn_dir(fullfile(filenames,'sub-*'),'-cell','-inf','-sort'); if isempty(files_all),fprintf(sprintf('warning: no sub-* files found in %s',filenames));dataset=[];return;end isgz=cellfun('length',regexp(files_all,'\.gz$'))>0; files_all=regexprep(files_all,'\.gz$',''); [files_all,nill,idx]=unique(files_all,'stable'); isgz=accumarray(idx(:),isgz(:),[],@min)>0; % note: disregards [filename.ext].gz if [filename.ext] exists [files_all_path,files_all_name,files_all_ext]=cellfun(@fileparts,files_all,'uni',0); files_all_ext(isgz)=cellfun(@(x)[x,'.gz'],files_all_ext(isgz),'uni',0); files_all(isgz)=cellfun(@(x)[x,'.gz'],files_all(isgz),'uni',0); str='^';for n=1:numel(spec), if n>1, str=[str,'(_']; else str=[str,'(']; end; str=[str,spec{n},'-[^_\.]+)?']; end; str=[str,'(_[^_]+)*?$']; files_parts=regexp(files_all_name,str,'tokens','once'); files_parts=cat(1,files_parts{:}); dataset.data.file=files_all(:); dataset.data.description=regexprep(arrayfun(@(n)[files_parts{n,[size(files_parts,2),2:size(files_parts,2)-1]}],1:size(files_parts,1),'uni',0),'_+',' '); dataset.data.contents=regexprep(files_parts(:,end),'^_+',''); [nill,dataset.data.folder]=cellfun(@fileparts,files_all_path(:),'uni',0); dataset.data.format=regexprep(files_all_ext(:),'^\.+',''); dataset.data.series=regexprep(cellfun(@(a,b,c)[a,b,c],... files_parts(:,find(ismember(spec,'ses'),1)),... files_parts(:,find(ismember(spec,'task'),1)),... files_parts(:,find(ismember(spec,'run'),1)),... 'uni',0),'^_+',''); for n=1:numel(spec), dataset.data.(spec{n})=regexprep(files_parts(:,n),['^_?',spec{n},'-'],''); end end for n=[{'file','description','contents','folder','format','series'},spec] dataset.dict.(n{1})=unique(dataset.data.(n{1})(cellfun('length',dataset.data.(n{1}))>0))'; if isempty(dataset.dict.(n{1})), dataset.dict.(n{1})={}; end end