libzypp  17.25.2
MetaLinkParser.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
14 #include <zypp/base/Logger.h>
15 #include <zypp/ByteArray.h>
16 #include <zypp/AutoDispose.h>
17 
18 #include <stack>
19 #include <vector>
20 #include <algorithm>
21 
22 #include <libxml2/libxml/SAX2.h>
23 
24 using namespace zypp::base;
25 
26 namespace zypp::media {
27  enum ParserState {
46  };
47 
48  struct transition {
49  std::string elementName; //< Name of the element for the transition to trigger
50  ParserState transitionTo; //< The state we go into when the element name in \a elementName is encountered
51  int docontent; //< Store the content of the element in the \a content member
52  };
53 
59  const std::unordered_map<ParserState, std::vector<transition> > & transitions () {
60  static std::unordered_map<ParserState, std::vector<transition> > map {
61  { STATE_START, {
62  { "metalink", STATE_METALINK, 0},
63  }
64  },
65  { STATE_METALINK, {
66  { "files", STATE_FILES, 0 },
67  { "file", STATE_M4FILE, 0 },
68  }
69  },
70  { STATE_FILES, {
71  { "file", STATE_FILE, 0},
72  }
73  },
74  { STATE_FILE, {
75  { "size", STATE_SIZE, 1 },
76  { "verification", STATE_VERIFICATION, 0 },
77  { "resources", STATE_RESOURCES, 0 },
78  }
79  },
81  { "hash", STATE_HASH, 1 },
82  { "pieces", STATE_PIECES, 0 },
83  }
84  },
85  { STATE_PIECES, {
86  { "hash", STATE_PHASH, 1 },
87  }
88  },
89  { STATE_RESOURCES, {
90  { "url", STATE_URL, 1 },
91  }
92  },
93  { STATE_M4FILE, {
94  { "size", STATE_M4SIZE, 1 },
95  { "hash", STATE_M4HASH, 1},
96  { "url", STATE_M4URL, 1},
97  { "pieces", STATE_M4PIECES, 0},
98  }
99  },
100  { STATE_M4PIECES, {
101  { "hash", STATE_M4PHASH, 1 },
102  }
103  },
104  };
105 
106  return map;
107  }
108 
109 static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
110 static void XMLCALL endElement(void *userData, const xmlChar *name);
111 static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
112 
115  : parser( nullptr )
116  , state( STATE_START )
117  , depth( 0 )
118  , statedepth( 0 )
119  , docontent( 0 )
120  , gotfile( 0 )
121  , size( -1 )
122  , blksize( 0 )
123  , piecel( 0 )
124  , chksuml( 0 )
125  {
126  content.reserve( 256 );
127 
128  xmlSAXHandler sax;
129  memset(&sax, 0, sizeof(sax));
130  sax.startElement = startElement;
131  sax.endElement = endElement;
132  sax.characters = characterData;
133 
134  //internally creates a copy of xmlSaxHandler, so having it as local variable is save
135  parser = AutoDispose<xmlParserCtxtPtr>( xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL), xmlFreeParserCtxt );
136  }
137 
138  void doTransition ( const transition &t ) {
139  parentStates.push( state );
140  state = t.transitionTo;
141  docontent = t.docontent;
142  statedepth = depth;
143  content.clear();
144  }
145 
146  void popState () {
147  state = parentStates.top();
148  statedepth--;
149  parentStates.pop();
150 
151  }
152 
154 
155  ParserState state; //< current state as defined in \ref stateswitch
156  std::stack<ParserState> parentStates;
157 
158  int depth; //< current element depth of traversing the document elements
159 
166 
167  std::string content; //< content of the current element
168  int docontent; //< should the content of the current elem be parsed
169 
170  int gotfile;
171  off_t size;
172  std::vector<MetalinkMirror> urls;
173  size_t blksize;
174 
175  std::vector<ByteArray> piece;
176  int piecel;
177 
178  std::vector<ByteArray> sha1;
179  std::vector<ByteArray> zsync;
180 
182  int chksuml;
183 };
184 
189 static const char *
190 find_attr(const char *txt, const xmlChar **atts)
191 {
192  if(!atts) {
193  return nullptr;
194  }
195 
196  for (; *atts; atts += 2)
197  {
198  if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
199  return reinterpret_cast<const char*>(atts[1]);
200  }
201  return nullptr;
202 }
203 
204 static void XMLCALL
205 startElement(void *userData, const xmlChar *name, const xmlChar **atts)
206 {
207  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
208 
209  // if the current element depth does not match the expected depth for the current state we
210  // ignore the element and just increase the depth
211  if (pd->depth != pd->statedepth) {
212  pd->depth++;
213  return;
214  }
215  pd->depth++;
216 
217  const auto &trMap = transitions();
218  const auto currStateTrs = trMap.find( pd->state );
219  if ( currStateTrs == trMap.end() )
220  return;
221 
222  // check if the current element name is part of our transitions
223  auto foundTr = std::find_if( currStateTrs->second.begin(), currStateTrs->second.end(), [name]( const auto &tr ){
224  return tr.elementName == reinterpret_cast<const char *>(name);
225  });
226 
227  if ( foundTr == currStateTrs->second.end() ) {
228  // we found no possible transition, ignore
229  return;
230  }
231 
232  if ( ( foundTr->transitionTo == STATE_FILE || foundTr->transitionTo == STATE_M4FILE ) && pd->gotfile++)
233  return; /* ignore all but the first file */
234 
235  // advance the state machine and prepare variables for the new state
236  pd->doTransition( *foundTr );
237 
238  switch(pd->state)
239  {
240  case STATE_URL:
241  case STATE_M4URL:
242  {
243  const char *priority = find_attr("priority", atts);
244  const char *preference = find_attr("preference", atts);
245  const char *maxconnections = find_attr("maxconnections", atts);
246  int prio;
247  auto &mirr = pd->urls.emplace_back();
248  if (priority)
249  prio = str::strtonum<int>(priority);
250  else if (preference)
251  prio = 101 - str::strtonum<int>(preference);
252  else
253  prio = 999999;
254  mirr.priority = prio;
255 
256  if ( maxconnections )
257  mirr.maxConnections = str::strtonum<int>( maxconnections );
258 
259  break;
260  }
261  case STATE_PIECES:
262  case STATE_M4PIECES:
263  {
264  const char *type = find_attr("type", atts);
265  const char *length = find_attr("length", atts);
266  size_t blksize;
267 
268  if (!type || !length)
269  {
270  pd->popState();
271  break;
272  }
273  blksize = str::strtonum<unsigned long>(length);
274  if (!blksize || (pd->blksize && pd->blksize != blksize))
275  {
276  pd->popState();
277  break;
278  }
279  pd->blksize = blksize;
280  pd->piece.clear();
281  if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
282  pd->piecel = 20;
283  else if (!strcmp(type, "zsync"))
284  pd->piecel = 4;
285  else
286  {
287  pd->popState();
288  break;
289  }
290  break;
291  }
292  case STATE_HASH:
293  case STATE_M4HASH:
294  {
295  const char *type = find_attr("type", atts);
296  if (!type)
297  type = "?";
298  if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
299  pd->chksuml = 20;
300  else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
301  pd->chksuml = 32;
302  else
303  {
304  pd->popState();
305  pd->docontent = 0;
306  }
307  break;
308  }
309  case STATE_PHASH:
310  case STATE_M4PHASH:
311  {
312  const char *piece = find_attr("piece", atts);
313  if ( pd->state == STATE_PHASH && (!piece || str::strtonum<uint>(piece) != pd->piece.size()) )
314  {
315  pd->popState();
316  }
317  break;
318  }
319  default:
320  break;
321  }
322 }
323 
324 ByteArray hexstr2bytes( std::string str )
325 {
326  ByteArray bytes;
327  for ( std::string::size_type i = 0; i < str.length(); i+=2 )
328  {
329 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \
330  : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \
331  : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \
332  : -1)
333  int v = c2h(str[i]);
334  if (v < 0)
335  return {};
336  bytes.push_back(v);
337  v = c2h(str[i+1]);
338  if (v < 0)
339  return {};
340  bytes.back() = (bytes.back() << 4) | v;
341 #undef c2h
342  }
343  return bytes;
344 }
345 
346 static void XMLCALL
347 endElement(void *userData, const xmlChar *)
348 {
349  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
350  //printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
351  if (pd->depth != pd->statedepth)
352  {
353  pd->depth--;
354  return;
355  }
356  switch (pd->state)
357  {
358  case STATE_SIZE:
359  case STATE_M4SIZE:
360  pd->size = (off_t)str::strtonum<off_t>(pd->content); //strtoull(pd->content, 0, 10);
361  break;
362  case STATE_HASH:
363  case STATE_M4HASH:
364  pd->chksum.clear();
365  pd->chksum = hexstr2bytes( pd->content );
366  if ( pd->content.length() != size_t(pd->chksuml) * 2 || !pd->chksum.size() )
367  {
368  pd->chksum.clear();
369  pd->chksuml = 0;
370  }
371  break;
372  case STATE_PHASH:
373  case STATE_M4PHASH: {
374  if ( pd->content.length() != size_t(pd->piecel) * 2 )
375  break;
376  ByteArray pieceHash = hexstr2bytes( pd->content );
377  if ( !pieceHash.size() )
378  pieceHash.resize( pd->piecel, 0 );
379  pd->piece.push_back( pieceHash );
380  break;
381  }
382  case STATE_PIECES:
383  case STATE_M4PIECES:
384  if (pd->piecel == 4)
385  pd->zsync = pd->piece;
386  else
387  pd->sha1 = pd->piece;
388 
389  pd->piecel = 0;
390  pd->piece.clear();
391  break;
392  case STATE_URL:
393  case STATE_M4URL:
394  if ( pd->content.length() )
395  pd->urls.back().url = std::string(pd->content);
396  else
397  // without a actual URL the mirror is useless
398  pd->urls.pop_back();
399  break;
400  default:
401  break;
402  }
403 
404  pd->depth--;
405  pd->popState();
406  pd->docontent = 0;
407 }
408 
409 static void XMLCALL
410 characterData(void *userData, const xmlChar *s, int len)
411 {
412  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
413  if (!pd->docontent)
414  return;
415 
416  if ( pd->content.length() + len + 1 > pd->content.capacity() )
417  pd->content.reserve( pd->content.capacity() + 256 );
418  pd->content.append( s, s+len );
419 }
420 
421 
422 MetaLinkParser::MetaLinkParser()
423  : pd( new ml_parsedata )
424 {}
425 
427 {
428  delete pd;
429 }
430 
431 void
433 {
434  parse(InputStream(filename));
435 }
436 
437 void
439 {
440  char buf[4096];
441  if (!is.stream())
442  ZYPP_THROW(Exception("MetaLinkParser: no such file"));
443  while (is.stream().good())
444  {
445  is.stream().read(buf, sizeof(buf));
446  parseBytes(buf, is.stream().gcount());
447  }
448  parseEnd();
449 }
450 
451 void
452 MetaLinkParser::parseBytes(const char *buf, size_t len)
453 {
454  if (!len)
455  return;
456 
457  if (xmlParseChunk(pd->parser, buf, len, 0)) {
458  ZYPP_THROW(Exception("Parse Error"));
459  }
460 }
461 
462 void
464 {
465  if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
466  ZYPP_THROW(Exception("Parse Error"));
467  }
468  if (pd->urls.size() ) {
469  stable_sort(pd->urls.begin(), pd->urls.end(), []( const auto &a, const auto &b ){
470  return a.priority < b.priority;
471  });
472  }
473 }
474 
475 std::vector<Url>
477 {
478  std::vector<Url> urls;
479  for ( const auto &mirr : pd->urls )
480  urls.push_back( mirr.url );
481  return urls;
482 }
483 
484 const std::vector<MetalinkMirror> &MetaLinkParser::getMirrors() const
485 {
486  return pd->urls;
487 }
488 
490 {
491  MediaBlockList bl(pd->size);
492  if (pd->chksuml == 20)
493  bl.setFileChecksum("SHA1", pd->chksuml, pd->chksum.data() );
494  else if (pd->chksuml == 32)
495  bl.setFileChecksum("SHA256", pd->chksuml, pd->chksum.data());
496  if (pd->size != off_t(-1) && pd->blksize)
497  {
498  size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
499  off_t off = 0;
500  size_t size = pd->blksize;
501  for ( size_t i = 0; i < nb; i++ )
502  {
503  if (i == nb - 1)
504  {
505  size = pd->size % pd->blksize;
506  if (!size)
507  size = pd->blksize;
508  }
509  size_t blkno = bl.addBlock(off, size);
510  if ( i < pd->sha1.size())
511  {
512  bl.setChecksum(blkno, "SHA1", 20, pd->sha1[i].data());
513  if ( i < pd->zsync.size())
514  {
515  unsigned char *p = pd->zsync[i].data();
516  bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
517  }
518  }
519  off += pd->blksize;
520  }
521  }
522  return bl;
523 }
524 
525 const std::vector<ByteArray> &MetaLinkParser::getZsyncBlockHashes() const
526 {
527  return pd->zsync;
528 }
529 
530 const std::vector<ByteArray> &MetaLinkParser::getSHA1BlockHashes() const
531 {
532  return pd->sha1;
533 }
534 
535 } // namespace zypp::media
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:392
ByteArray hexstr2bytes(std::string str)
static void XMLCALL endElement(void *userData, const xmlChar *name)
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
String related utilities and Regular expression matching.
Helper to create and pass std::istream.
Definition: InputStream.h:56
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
const std::unordered_map< ParserState, std::vector< transition > > & transitions()
void parseEnd()
tells the parser that all chunks are now processed
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
struct ml_parsedata * pd
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
const std::vector< MetalinkMirror > & getMirrors() const
return the mirrors from the parsed metalink data
#define nullptr
Definition: Easy.h:55
AutoDispose< xmlParserCtxtPtr > parser
std::vector< ByteArray > sha1
SolvableIdType size_type
Definition: PoolMember.h:126
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
Base class for Exception.
Definition: Exception.h:145
static const char * find_attr(const char *txt, const xmlChar **atts)
Look up a xml attribute in the passed array atts.
std::istream & stream() const
The std::istream.
Definition: InputStream.h:93
std::stack< ParserState > parentStates
std::vector< ByteArray > zsync
std::vector< MetalinkMirror > urls
std::vector< ByteArray > piece
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
void doTransition(const transition &t)
#define c2h(c)
const std::vector< ByteArray > & getSHA1BlockHashes() const
const std::vector< ByteArray > & getZsyncBlockHashes() const
std::vector< unsigned char > ByteArray
Definition: ByteArray.h:14