13 #ifndef MMTF_STRUCTURE_DATA_H 14 #define MMTF_STRUCTURE_DATA_H 21 #include <msgpack.hpp> 30 #define MMTF_SPEC_VERSION_MAJOR 1 31 #define MMTF_SPEC_VERSION_MINOR 0 109 for(
size_t i = 16; i--;) {
110 if ( matrix[i] != c.
matrix[i] ) {
118 MSGPACK_DEFINE_MAP(chainIndexList, matrix);
136 MSGPACK_DEFINE_MAP(transformList, name);
204 bool hasConsistentData(
bool verbose=
false, uint32_t chain_name_max_length = 4)
const;
214 std::string print(std::string delim=
"\t");
268 template <
typename T>
276 template <
typename T>
278 template <
typename T>
288 template <
typename T>
312 bool isValidDateFormatOptional(
const std::string& s) {
316 if (s.length() != 10)
return false;
318 if (s[4] !=
'-' || s[7] !=
'-')
return false;
320 std::istringstream is(s);
323 if (is >> y >> dash1 >> m >> dash2 >> d) {
324 return (dash1 ==
'-' && dash2 ==
'-');
332 bool hasRightSizeOptional(
const std::vector<T>& v,
int exp_size) {
337 template<
typename T,
typename Tnum>
338 bool hasValidIndices(
const T* v,
size_t size, Tnum num) {
340 for (
size_t i = 0; i < size; ++i) {
341 if (v[i] < T(0) || v[i] >= tnum)
return false;
345 template<
typename T,
typename Tnum>
346 bool hasValidIndices(
const std::vector<T>& v, Tnum num) {
347 if (v.empty())
return true;
348 else return hasValidIndices(&v[0], v.size(), num);
356 std::stringstream version;
358 return version.str();
362 std::stringstream ss(version_string);
369 template <
typename T>
372 template <
typename T>
374 return (value == getDefaultValue<T>());
376 template <
typename T>
378 return value.empty();
382 return value.empty();
385 template <
typename T>
387 value = getDefaultValue<T>();
403 const char* hetatm_type[] = {
404 "D-BETA-PEPTIDE, C-GAMMA LINKING",
405 "D-GAMMA-PEPTIDE, C-DELTA LINKING",
406 "D-PEPTIDE COOH CARBOXY TERMINUS",
407 "D-PEPTIDE NH3 AMINO TERMINUS",
410 "D-SACCHARIDE 1,4 AND 1,4 LINKING",
411 "D-SACCHARIDE 1,4 AND 1,6 LINKING",
412 "DNA OH 3 PRIME TERMINUS",
413 "DNA OH 5 PRIME TERMINUS",
417 "L-BETA-PEPTIDE, C-GAMMA LINKING",
418 "L-GAMMA-PEPTIDE, C-DELTA LINKING",
419 "L-PEPTIDE COOH CARBOXY TERMINUS",
420 "L-PEPTIDE NH3 AMINO TERMINUS",
423 "L-SACCHARIDE 1,4 AND 1,4 LINKING",
424 "L-SACCHARIDE 1,4 AND 1,6 LINKING",
425 "RNA OH 3 PRIME TERMINUS",
426 "RNA OH 5 PRIME TERMINUS",
434 for (
int i=0; hetatm_type[i]; ++i) {
435 if (strcmp(type,hetatm_type[i]) == 0)
return true;
455 mmtfProducer =
"mmtf-cpp library (github.com/rcsb/mmtf-cpp)";
460 if (!hasRightSizeOptional(unitCell, 6)) {
462 std::cout <<
"inconsistent unitCell (unitCell length != 6)" << std::endl;
467 if (!isValidDateFormatOptional(depositionDate)) {
469 std::cout <<
"inconsistent depositionDate (does not match 'YYYY-MM-DD' " 470 "or empty)" << std::endl;
474 if (!isValidDateFormatOptional(releaseDate)) {
476 std::cout <<
"inconsistent releaseDate (does not match 'YYYY-MM-DD' " 477 "or empty)" << std::endl;
482 for (
size_t i = 0; i < ncsOperatorList.size(); ++i) {
483 if ((
int)ncsOperatorList[i].size() != 16) {
485 std::cout <<
"inconsistent ncsOperatorList idx: " << i <<
" found size: " 486 << ncsOperatorList[i].size() <<
" != 16" << std::endl;
492 for (
size_t i = 0; i < bioAssemblyList.size(); ++i) {
498 std::cout <<
"inconsistent BioAssemby transform i j: " << i
499 <<
" " << j << std::endl;
505 for (
size_t i = 0; i < entityList.size(); ++i) {
506 const Entity& ent = entityList[i];
509 std::cout <<
"inconsistent entity idx: " << i << std::endl;
515 for (
size_t i = 0; i < groupList.size(); ++i) {
520 std::cout <<
"inconsistent group::atomNameList size at idx: " 527 std::cout <<
"inconsistent group::elementList size at idx: " 535 std::cout <<
"inconsistent group::bondAtomList size: " <<
536 g.
bondAtomList.size() <<
" != group::bondOrderList size(*2): " <<
544 std::cout <<
"inconsistent group::bondAtomList indices (not all in [0, " 545 << num_atoms - 1 <<
"]) at idx: " << i << std::endl;
554 std::cout <<
"inconsistent bondAtomList size: " <<
555 bondAtomList.size() <<
" != bondOrderList size(*2): " <<
563 std::cout <<
"inconsistent bondAtomList indices (not all in [0, " 564 << numAtoms - 1 <<
"])" << std::endl;
569 if ((
int)xCoordList.size() != numAtoms) {
571 std::cout <<
"inconsistent xCoordList size" << std::endl;
575 if ((
int)yCoordList.size() != numAtoms) {
577 std::cout <<
"inconsistent yCoordList size" << std::endl;
581 if ((
int)zCoordList.size() != numAtoms) {
583 std::cout <<
"inconsistent zCoordList size" << std::endl;
587 if (!hasRightSizeOptional(bFactorList, numAtoms)) {
589 std::cout <<
"inconsistent bFactorList size" << std::endl;
593 if (!hasRightSizeOptional(atomIdList, numAtoms)) {
595 std::cout <<
"inconsistent atomIdList size" << std::endl;
599 if (!hasRightSizeOptional(altLocList, numAtoms)) {
601 std::cout <<
"inconsistent altLocList size" << std::endl;
605 if (!hasRightSizeOptional(occupancyList, numAtoms)) {
607 std::cout <<
"inconsistent occupancyList size" << std::endl;
611 if ((
int)groupIdList.size() != numGroups) {
613 std::cout <<
"inconsistent groupIdList size" << std::endl;
617 if ((
int)groupTypeList.size() != numGroups) {
619 std::cout <<
"inconsistent groupTypeList size" << std::endl;
623 if (!hasRightSizeOptional(secStructList, numGroups)) {
625 std::cout <<
"inconsistent secStructList size" << std::endl;
629 if (!hasRightSizeOptional(insCodeList, numGroups)) {
631 std::cout <<
"inconsistent insCodeList size" << std::endl;
635 if (!hasRightSizeOptional(sequenceIndexList, numGroups)) {
637 std::cout <<
"inconsistent sequenceIndexList size" << std::endl;
641 if ((
int)chainIdList.size() != numChains) {
643 std::cout <<
"inconsistent chainIdList size" << std::endl;
647 if (!hasRightSizeOptional(chainNameList, numChains)) {
649 std::cout <<
"inconsistent chainNameList size" << std::endl;
653 if ((
int)groupsPerChain.size() != numChains) {
655 std::cout <<
"inconsistent groupsPerChain size" << std::endl;
659 if ((
int)chainsPerModel.size() != numModels) {
661 std::cout <<
"inconsistent chainsPerModel size" << std::endl;
666 if (!hasValidIndices(groupTypeList, groupList.size())) {
668 std::cout <<
"inconsistent groupTypeList indices (not all in [0, " 669 << groupList.size() - 1 <<
"])" << std::endl;
674 std::vector<int32_t> sequenceIndexSize(numChains);
675 for (
size_t i = 0; i < entityList.size(); ++i) {
676 const Entity& ent = entityList[i];
682 int bond_count_from_atom = 0;
683 int bond_count_from_order = 0;
684 bool all_bond_orderLists_are_default =
true;
685 bool all_bond_atomLists_are_default =
true;
687 all_bond_orderLists_are_default =
false;
691 all_bond_atomLists_are_default =
false;
698 for (
int model_idx = 0; model_idx < numModels; ++model_idx) {
700 for (
int j = 0; j < chainsPerModel[model_idx]; ++j, ++chain_idx) {
702 if (chainIdList[chain_idx].size() > chain_name_max_length) {
704 std::cout <<
"inconsistent chainIdList size at chain_idx: " 705 << chain_idx <<
" size: " 706 << chainIdList[chain_idx].size() << std::endl;
711 && chainNameList[chain_idx].size() > chain_name_max_length) {
713 std::cout <<
"inconsistent chainNameList size at chain_idx:" 714 << chain_idx <<
" size: " 715 << chainNameList[chain_idx].size() << std::endl;
720 for (
int k = 0; k < groupsPerChain[chain_idx]; ++k, ++group_idx) {
723 const int32_t idx = sequenceIndexList[group_idx];
725 if (idx < -1 || idx >= sequenceIndexSize[chain_idx]) {
727 std::cout <<
"inconsistent sequenceIndexSize at" 728 " chain_idx: " << chain_idx << std::endl;
734 const GroupType& group = groupList[groupTypeList[group_idx]];
738 all_bond_orderLists_are_default =
false;
742 all_bond_atomLists_are_default =
false;
750 if (!all_bond_orderLists_are_default) {
751 if (bond_count_from_order != numBonds) {
753 std::cout <<
"inconsistent numBonds vs bond order count" << std::endl;
758 if (!all_bond_atomLists_are_default) {
759 if (bond_count_from_atom != numBonds) {
761 std::cout <<
"inconsistent numBonds vs bond atom list count" << std::endl;
766 if (chain_idx != numChains) {
768 std::cout <<
"inconsistent numChains" << std::endl;
772 if (group_idx != numGroups) {
774 std::cout <<
"inconsistent numGroups size" << std::endl;
778 if (atom_idx != numAtoms) {
780 std::cout <<
"inconsistent numAtoms size" << std::endl;
789 std::ostringstream out;
796 for (
int i = 0; i < numModels; i++, modelIndex++) {
798 for (
int j = 0; j < chainsPerModel[modelIndex]; j++, chainIndex++) {
800 for (
int k = 0; k < groupsPerChain[chainIndex]; k++, groupIndex++) {
802 groupList[groupTypeList[groupIndex]];
805 for (
int l = 0; l < groupAtomCount; l++, atomIndex++) {
808 out <<
"HETATM" << delim;
810 out <<
"ATOM" << delim;
813 out << std::setfill(
'0') << std::internal << std::setw(6) <<
814 std::right << atomIdList[atomIndex] << delim;
815 }
else out <<
"." << delim;
820 if ( altLocList[atomIndex] ==
' ' ||
821 altLocList[atomIndex] == 0x00 )
823 else out << altLocList[atomIndex] << delim;
824 }
else out <<
"." << delim;
828 out << chainIdList[chainIndex] << delim;
830 out << chainNameList[chainIndex];
832 }
else out <<
"." << delim;
834 out << groupIdList[groupIndex] << delim;
837 if ( insCodeList[groupIndex] ==
' ' ||
838 insCodeList[groupIndex] == 0x00 )
840 else out << int(insCodeList[groupIndex]) << delim;
843 out << std::fixed << std::setprecision(3);
844 out << xCoordList[atomIndex] << delim;
845 out << yCoordList[atomIndex] << delim;
846 out << zCoordList[atomIndex] << delim;
850 out << bFactorList[atomIndex] << delim;
851 }
else out <<
"." << delim;
854 out << occupancyList[atomIndex] << delim;
855 }
else out <<
"." << delim;
std::vector< int8_t > bondOrderList
Definition: structure_data.hpp:174
std::vector< std::string > elementList
Definition: structure_data.hpp:52
std::vector< int8_t > secStructList
Definition: structure_data.hpp:184
std::vector< std::string > atomNameList
Definition: structure_data.hpp:51
bool isVersionSupported(const std::string &version_string)
Check if version is supported (minor revisions ok, major ones not)
Definition: structure_data.hpp:361
std::vector< char > altLocList
Definition: structure_data.hpp:180
bool operator==(GroupType const &c) const
Definition: structure_data.hpp:59
std::string mmtfVersion
Definition: structure_data.hpp:152
std::vector< std::string > experimentalMethods
Definition: structure_data.hpp:163
int32_t numGroups
Definition: structure_data.hpp:169
std::vector< float > yCoordList
Definition: structure_data.hpp:176
std::vector< int32_t > groupsPerChain
Definition: structure_data.hpp:189
std::vector< char > insCodeList
Definition: structure_data.hpp:185
std::string structureId
Definition: structure_data.hpp:156
std::vector< GroupType > groupList
Definition: structure_data.hpp:172
Top level MMTF data container.
Definition: structure_data.hpp:151
std::vector< int32_t > groupTypeList
Definition: structure_data.hpp:183
std::string type
Definition: structure_data.hpp:80
std::vector< int32_t > groupIdList
Definition: structure_data.hpp:182
bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length=4) const
Check consistency of structural data.
Definition: structure_data.hpp:458
#define MMTF_SPEC_VERSION_MINOR
Definition: structure_data.hpp:31
std::vector< Transform > transformList
Definition: structure_data.hpp:127
std::vector< int32_t > chainsPerModel
Definition: structure_data.hpp:190
std::vector< Entity > entityList
Definition: structure_data.hpp:162
std::string description
Definition: structure_data.hpp:79
std::string groupName
Definition: structure_data.hpp:55
std::vector< int32_t > bondAtomList
Definition: structure_data.hpp:53
bool operator==(BioAssembly const &c) const
Definition: structure_data.hpp:130
bool operator==(StructureData const &c) const
compare two StructureData classes
Definition: structure_data.hpp:220
char singleLetterCode
Definition: structure_data.hpp:56
bool operator==(Entity const &c) const
Definition: structure_data.hpp:83
int32_t numChains
Definition: structure_data.hpp:170
std::vector< int32_t > formalChargeList
Definition: structure_data.hpp:50
void setDefaultValue(T &value)
Set default value to given type.
Definition: structure_data.hpp:386
Data store for the biological assembly annotation.
Definition: structure_data.hpp:126
std::vector< int32_t > atomIdList
Definition: structure_data.hpp:179
std::vector< std::string > chainNameList
Definition: structure_data.hpp:188
std::vector< std::vector< float > > ncsOperatorList
Definition: structure_data.hpp:160
float resolution
Definition: structure_data.hpp:164
Group (residue) level data store.
Definition: structure_data.hpp:49
float rFree
Definition: structure_data.hpp:165
int32_t numBonds
Definition: structure_data.hpp:167
std::vector< float > xCoordList
Definition: structure_data.hpp:175
Definition: binary_decoder.hpp:24
std::string chemCompType
Definition: structure_data.hpp:57
std::string sequence
Definition: structure_data.hpp:81
std::vector< float > zCoordList
Definition: structure_data.hpp:177
std::vector< float > unitCell
Definition: structure_data.hpp:154
std::string releaseDate
Definition: structure_data.hpp:159
std::string name
Definition: structure_data.hpp:128
int32_t numModels
Definition: structure_data.hpp:171
std::string depositionDate
Definition: structure_data.hpp:158
std::vector< std::string > chainIdList
Definition: structure_data.hpp:187
#define MMTF_SPEC_VERSION_MAJOR
MMTF spec version which this library implements.
Definition: structure_data.hpp:30
T getDefaultValue()
Get default value for given type.
Definition: structure_data.hpp:370
int32_t numAtoms
Definition: structure_data.hpp:168
std::vector< BioAssembly > bioAssemblyList
Definition: structure_data.hpp:161
std::vector< int32_t > chainIndexList
Definition: structure_data.hpp:78
std::string print(std::string delim="\t")
Read out the contents of mmtf::StructureData in a PDB-like fashion Columns are in order: ATOM/HETATM ...
Definition: structure_data.hpp:788
float rWork
Definition: structure_data.hpp:166
std::vector< int8_t > bondOrderList
Definition: structure_data.hpp:54
bool isDefaultValue(const T &value)
Definition: structure_data.hpp:373
std::vector< int32_t > sequenceIndexList
Definition: structure_data.hpp:186
std::string title
Definition: structure_data.hpp:157
std::string getVersionString()
Get string representation of MMTF spec version implemented here.
Definition: structure_data.hpp:355
StructureData()
Construct object with default values set.
Definition: structure_data.hpp:442
Entity type.
Definition: structure_data.hpp:77
std::string spaceGroup
Definition: structure_data.hpp:155
std::string mmtfProducer
Definition: structure_data.hpp:153
std::vector< float > bFactorList
Definition: structure_data.hpp:178
std::vector< int32_t > bondAtomList
Definition: structure_data.hpp:173
std::vector< float > occupancyList
Definition: structure_data.hpp:181
bool is_hetatm(const char *type)
Check if type is hetatm.
Definition: structure_data.hpp:402