libzypp  17.38.13
RepomdFileReader.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
12 #include <iostream>
13 #include <utility>
14 
15 #include <zypp/ZYppCallbacks.h>
16 #include <zypp-core/base/String.h>
18 #include <zypp-core/base/Regex.h>
19 
20 #include <zypp-core/Pathname.h>
21 #include <zypp-core/Date.h>
22 #include <zypp-core/Url.h>
23 #include <zypp/CheckSum.h>
24 #include <zypp/parser/xml/Reader.h>
25 
27 
28 #undef ZYPP_BASE_LOGGER_LOGGROUP
29 #define ZYPP_BASE_LOGGER_LOGGROUP "parser::yum"
30 
31 using std::endl;
32 using namespace zypp::xml;
33 
34 namespace zypp
35 {
36  namespace parser
37  {
38  namespace yum
39  {
40 
41 
43  //
44  // CLASS NAME : RepomdFileReader::Impl
45  //
47  {
48  public:
50  Impl(const Pathname &repomd_file, ProcessResource &&callback )
51  : _callback( std::move(callback) )
52  , _repomdFile( repomd_file )
53  {
54  Reader reader( repomd_file );
55  MIL << "Reading " << repomd_file << endl;
56  reader.foreachNode( bind( &RepomdFileReader::Impl::consumeNode, this, _1 ) );
57  }
58 
62  bool consumeNode( Reader & reader_r );
63 
64 
66  const std::set<std::string> & keywords() const
67  { return _keywords; }
68 
69  private:
71  CheckSum getChecksum( Reader & reader_r )
72  { return CheckSum( reader_r->getAttribute("type").asString(), reader_r.nodeText().asString() ); }
73 
75  ByteCount getSize( Reader & reader_r )
76  { return ByteCount( str::strtonum<ByteCount::SizeType>( reader_r.nodeText().asString() ) ); }
77 
78 
79  private:
82 
84  std::string _typeStr;
85 
88 
89  std::set<std::string> _keywords;
90 
92  bool _discardDataEntry = false;
93  };
95 
96  /*
97  * xpath and multiplicity of processed nodes are included in the code
98  * for convenience:
99  *
100  * // xpath: <xpath> (?|*|+)
101  *
102  * if multiplicity is ommited, then the node has multiplicity 'one'.
103  */
104 
105  // --------------------------------------------------------------------------
106 
107  bool RepomdFileReader::Impl::consumeNode( Reader & reader_r )
108  {
109  if ( reader_r->nodeType() == XML_READER_TYPE_ELEMENT && not _discardDataEntry )
110  {
111  // xpath: /repomd
112  if ( reader_r->name() == "repomd" )
113  {
114  return true;
115  }
116 
117  // xpath: /repomd/data (+)
118  if ( reader_r->name() == "data" )
119  {
120  _typeStr = reader_r->getAttribute("type").asString();
121  return true;
122  }
123 
124  // xpath: /repomd/location
125  if ( reader_r->name() == "location" )
126  {
127  Pathname location { reader_r->getAttribute("href").asString() };
128  if ( location.relativeDotDot() ) {
129  // Don't accept downloadable data outside repo root
130  JobReport::warning( str::sconcat( _repomdFile,": data type ",_typeStr,": hostile location ",location," => discard data entry" ) );
131  pWAR( "Hostile location:", _typeStr, location, "=>", "discard data entry" );
132  _discardDataEntry = true;
133  return true;
134  }
135  _location.setLocation( std::move(location), 1 );
136 
137  // ignoring attribute xml:base
138  return true;
139  }
140 
141  // xpath: /repomd/checksum
142  if ( reader_r->name() == "checksum" )
143  {
144  _location.setChecksum( getChecksum( reader_r ) );
145  return true;
146  }
147 
148  // xpath: /repomd/header-checksum
149  if ( reader_r->name() == "header-checksum" )
150  {
151  _location.setHeaderChecksum( getChecksum( reader_r ) );
152  return true;
153  }
154 
155  // xpath: /repomd/timestamp
156  if ( reader_r->name() == "timestamp" )
157  {
158  // ignore it
159  return true;
160  }
161 
162  // xpath: /repomd/size
163  if ( reader_r->name() == "size" )
164  {
165  _location.setDownloadSize( getSize( reader_r ) );
166  return true;
167  }
168 
169  // xpath: /repomd/header-size
170  if ( reader_r->name() == "header-size" )
171  {
172  _location.setHeaderSize( getSize( reader_r ) );
173  return true;
174  }
175 
176  // xpath: /tags/content
177  if ( reader_r->name() == "content" )
178  {
179  const auto & tag = reader_r.nodeText();
180  if ( tag.c_str() && *tag.c_str() )
181  _keywords.insert( tag.asString() ); // remember keyword
182  return true;
183  }
184  }
185 
186  else if ( reader_r->nodeType() == XML_READER_TYPE_END_ELEMENT )
187  {
188  // xpath: /repomd/data
189  if ( reader_r->name() == "data" )
190  {
191  if ( _callback ) {
192  if ( not _discardDataEntry )
193  _callback( std::move(_location), _typeStr );
194  _discardDataEntry = false;
195  _location = OnMediaLocation();
196  _typeStr.clear();
197  }
198  return true;
199  }
200  }
201 
202  return true;
203  }
204 
205 
207  //
208  // CLASS NAME : RepomdFileReader
209  //
211 
212  RepomdFileReader::RepomdFileReader( const Pathname & repomd_file, ProcessResource callback )
213  : _pimpl( new Impl(repomd_file, std::move(callback)) )
214  {}
215 
217  : _pimpl( new Impl(repomd_file, ProcessResource()) )
218  {}
219 
221  {}
222 
223  const std::set<std::string> & RepomdFileReader::keywords() const
224  { return _pimpl->keywords(); }
225 
226  std::vector<std::pair<std::string,std::string>> RepomdFileReader::keyhints() const
227  {
228  std::vector<std::pair<std::string,std::string>> ret;
229  for ( const std::string & tag : keywords() ) {
230  // Get keyhints on the fly:
231  // gpg-pubkey-39db7c82-5847eb1f.asc?fpr=22C07BA534178CD02EFE22AAB88B2FD43DBDC284
232  // Fingerprint is explicitly mentioned or id/fpr can be derived from the filename
233  if ( tag.compare( 0,10,"gpg-pubkey" ) != 0 )
234  continue;
235 
236  // bsc#1267426: Do not accept a '/' in keyfile. It must not be a path.
237  static const str::regex rx( "^(gpg-pubkey([^/?]*))(\\?fpr=([[:xdigit:]]{8,}))?$" );
238  str::smatch what;
239  if ( str::regex_match( tag.c_str(), what, rx ) ) {
240  std::string keyfile { what[1] };
241  std::string keyident;
242  if ( what.size(4) != std::string::npos ) { // with fpr=
243  keyident = what[4];
244  }
245  else {
246  static const str::regex rx( /*gpg-pubkey*/"^-([[:xdigit:]]{8,})" );
247  if ( str::regex_match( what[2], what, rx ) ) {
248  keyident = what[1];
249  }
250  else {
251  DBG << "Tag " << tag << " does not contain a keyident. ignore it." << endl;
252  continue;
253  }
254  }
255  ret.push_back( std::make_pair( std::move(keyfile), std::move(keyident) ) );
256  }
257  }
258  return ret;
259  }
260 
261  } // ns yum
262  } // ns parser
263 } // ns zypp
264 
265 // vim: set ts=2 sts=2 sw=2 et ai:
#define MIL
Definition: Logger.h:103
unsigned size() const
Definition: Regex.cc:106
std::string _typeStr
The resource type string.
Describes a resource file located on a medium.
Regular expression.
Definition: Regex.h:94
Store and operate with byte count.
Definition: ByteCount.h:31
NodeType nodeType() const
Get the node type of the current node.
Definition: Node.h:126
Definition: ansi.h:854
static bool warning(const std::string &msg_r, const UserData &userData_r=UserData())
send warning text
Impl(const Pathname &repomd_file, ProcessResource &&callback)
Ctro taking a ProcessResource callback.
ByteCount getSize(Reader &reader_r)
Retrieve a size node.
OnMediaLocation _location
Location of metadata file.
XmlString getAttribute(const char *name_r) const
Provides a copy of the attribute value with the specified qualified name.
Definition: Node.h:71
std::vector< std::pair< std::string, std::string > > keyhints() const
gpg key hits shipped in keywords (bsc#1184326)
RW_pointer< Impl, rw_pointer::Scoped< Impl > > _pimpl
RepomdFileReader(const Pathname &repomd_file, ProcessResource callback)
CTOR.
#define pWAR
Definition: LogTools.h:316
std::string sconcat(Args &&... args)
Concat words as string.
Definition: LogTools.h:276
const std::set< std::string > & keywords() const
repo keywords parsed on the fly
Pathname _repomdFile
remember parsed filename
function< bool(OnMediaLocation &&, const std::string &)> ProcessResource
Callback taking OnMediaLocation and the resource type string.
const ProcessCredentials & _callback
Regular expression match result.
Definition: Regex.h:167
bool foreachNode(const ProcessNode &fnc_r)
Definition: Reader.h:144
XmlString nodeText()
If the current node is not empty, advances the reader to the next node, and returns the value...
Definition: Reader.cc:122
CheckSum getChecksum(Reader &reader_r)
Retrieve a checksum node.
std::string asString() const
Explicit conversion to std::string.
Definition: XmlString.h:77
XmlString name() const
The qualified name of the node, equal to Prefix :LocalName.
Definition: Node.h:118
bool regex_match(const char *s, smatch &matches, const regex &regex) ZYPP_API
Regular expression matching.
Definition: Regex.cc:80
std::set< std::string > _keywords
repo keywords parsed on the fly
Interface of repomd.xml file reader.
ProcessResource _callback
Function for processing collected data.
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:1
#define DBG
Definition: Logger.h:102
const std::set< std::string > & keywords() const
repo keywords parsed on the fly
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
xmlTextReader based interface to iterate xml streams.
Definition: Reader.h:95