diff options
-rw-r--r-- | import.all.php | 16 | ||||
-rw-r--r-- | import.arches.php | 8 | ||||
-rw-r--r-- | import.bugzilla.php | 21 | ||||
-rw-r--r-- | import.categories.php | 18 | ||||
-rw-r--r-- | import.ebuild_arch.php | 56 | ||||
-rw-r--r-- | import.ebuild_homepage.php | 43 | ||||
-rw-r--r-- | import.ebuild_license.php | 48 | ||||
-rw-r--r-- | import.ebuild_mask.php | 7 | ||||
-rw-r--r-- | import.ebuild_metadata.php | 28 | ||||
-rw-r--r-- | import.ebuilds.php | 28 | ||||
-rw-r--r-- | import.eclasses.php | 7 | ||||
-rw-r--r-- | import.herds.php | 7 | ||||
-rw-r--r-- | import.licenses.php | 9 | ||||
-rw-r--r-- | import.package_mask.php | 7 | ||||
-rw-r--r-- | import.packages.php | 37 | ||||
-rw-r--r-- | import.use_expand.php | 9 | ||||
-rw-r--r-- | import.use_flags.php | 9 | ||||
-rw-r--r-- | import.use_local.php | 9 |
18 files changed, 252 insertions, 115 deletions
diff --git a/import.all.php b/import.all.php index aaa4b27..03bb13d 100644 --- a/import.all.php +++ b/import.all.php @@ -2,20 +2,31 @@ require_once 'header.php'; + // This gets used everywhere, might as well create it here + // and check for it later. + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + + // Had it choke out on me when starting from scratch ini_set('memory_limit', -1); + // Always verbose since we are generally running manually from CLI $verbose = true; + // Run with -cron arg to go quietly into the night. + // No code in here to do emerge --sync. Runs separately. if(in_array("-cron", $argv)) { $verbose = false; $cron = true; } + // Log the import times of the scripts if($cron) { $sql = "INSERT INTO znurt (action) VALUES ('start_import');"; $db->query($sql); } + // FIXME these could be accidentally overwritten in one of the includes $base = true; $packages = true; $ebuilds = true; @@ -23,14 +34,15 @@ $use = true; $final = true; + // Thankfully, I've never really had these break down much, never + // had much use for the grouping. $arr_import['base'] = array('arches', 'eclasses', 'herds', 'licenses'); $arr_import['packages'] = array('categories', 'packages', 'bugzilla'); $arr_import['ebuilds'] = array('ebuilds'); $arr_import['metadata'] = array('ebuild_metadata', 'ebuild_arch', 'ebuild_homepage', 'ebuild_license', 'package_mask', 'ebuild_mask', 'ebuild_ev', 'use_global', 'use_local', 'use_expand', 'ebuild_use', 'ebuild_depend'); - $arr_import['final'] = array('final'); - + // FIXME updating the website with our import status would be nice. foreach($arr_import as $key => $arr) { if($$key) { foreach($arr as $file) { diff --git a/import.arches.php b/import.arches.php index ff4ce72..efa09a9 100644 --- a/import.arches.php +++ b/import.arches.php @@ -1,10 +1,14 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; - $tree = new PortageTree(); + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + // FIXME This is really dumb, just grab all the arches + // since I look at all of them now. $arr_arches = $tree->getArches(); $arr_arches = array_merge($arr_arches, $tree->getArches(true)); diff --git a/import.bugzilla.php b/import.bugzilla.php index 3469fa2..0688ad7 100644 --- a/import.bugzilla.php +++ b/import.bugzilla.php @@ -1,11 +1,19 @@ <? + /** + * The bugzilla script grabs a CSV export from Gentoo's bugzilla and inserts them into the database. + * + * The CSV export seems to be non-negotiable in asking for columns, so you're stuck with what you get. + * + * Only executes once a day. Goes rather fast, for the most part. Could probably use some curl error + * checking in case the site is down, unreachable or slow to respond. + */ + require_once 'header.php'; - require_once 'class.portage.tree.php'; - - $tree = new PortageTree(); - -// $verbose = true; + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } $import_bugzilla = false; @@ -37,6 +45,8 @@ $url = "http://bugs.gentoo.org/buglist.cgi?bug_file_loc=&bug_file_loc_type=allwordssubstr&bug_id=&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bugidtype=include&chfieldfrom=&chfieldto=Now&chfieldvalue=&email1=&email2=&emailtype1=exact&emailtype2=substring&field-1-0-0=product&field-1-1-0=bug_status&field-1-2-0=short_desc&field0-0-0=assigned_to&field0-1-0=assigned_to&field0-2-0=short_desc&keywords=&keywords_type=allwords&long_desc=&long_desc_type=allwordssubstr&product=Gentoo%20Linux&query_format=advanced&remaction=&short_desc=$category_name%2F&short_desc_type=allwordssubstr&status_whiteboard=&status_whiteboard_type=allwordssubstr&type-1-0-0=anyexact&type-1-1-0=anyexact&type-1-2-0=allwordssubstr&type0-0-0=notequals&type0-1-0=notequals&type0-2-0=notsubstring&value-1-0-0=Gentoo%20Linux&value-1-1-0=UNCONFIRMED%2CNEW%2CASSIGNED%2CREOPENED&value-1-2-0=media-video%2F&value0-0-0=maintainer-wanted%40gentoo.org&value0-1-0=maintainer-needed%40gentoo.org&value0-2-0=new%20package&votes=&ctype=csv"; $ch = curl_init($url); + // FIXME write to a *new* temporary file + // FIXME Make sure the CSV is non-empty $fp = fopen("/tmp/bugs.csv", "w"); curl_setopt($ch, CURLOPT_FILE, $fp); @@ -52,6 +62,7 @@ } // FIXME this query takes a while to run + // FIXME Have this run after each category is imported, and query for that $cp. That'll go much faster. $sql = "INSERT INTO package_bugs (bug, package, description, status) SELECT b.bug_id AS bug, p.id AS package, b.short_short_desc, 1 FROM package p INNER JOIN category c ON p.category = c.id INNER JOIN bugzilla b ON b.short_short_desc LIKE ('%' || c.name || '/' || p.name || '%');"; $db->query($sql); diff --git a/import.categories.php b/import.categories.php index 201cb41..0bec245 100644 --- a/import.categories.php +++ b/import.categories.php @@ -1,11 +1,15 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.db.category.php'; - $tree = new PortageTree(); $table = 'category'; $arr = $tree->getCategories(); @@ -27,6 +31,10 @@ } } + // FIXME I should check the mtimes of the directories + // instead, just to get an idea of when things were + // last changed. Also, store the metadata mtime. + // Update descriptions $sql = "SELECT name, id FROM category;"; $arr = $db->getAssoc($sql); @@ -34,10 +42,10 @@ foreach($arr as $category_name => $category) { $db_category = new DBCategory($category); - $obj_category = new PortageCategory($category_name); + $c = new PortageCategory($category_name); - if($db_category->description != $obj_category->description['en']) - $db_category->description = $obj_category->description['en']; + if($db_category->description != $c->description['en']) + $db_category->description = $c->description['en']; } diff --git a/import.ebuild_arch.php b/import.ebuild_arch.php index 957c19f..ab9e77b 100644 --- a/import.ebuild_arch.php +++ b/import.ebuild_arch.php @@ -1,11 +1,16 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; - + // $verbose = true; // $qa = true; @@ -14,40 +19,41 @@ $arr_arches = $tree->getArches(); // Find all the ebuilds that are missing ebuild arch - $sql = "SELECT id, metadata FROM missing_arch;"; - $arr = $db->getAssoc($sql); + $sql = "SELECT ebuild, metadata FROM missing_arch;"; + $arr_missing_arch = $db->getAssoc($sql); if($verbose) - shell::msg(count($arr)." ebuilds to check"); + shell::msg(count($arr_missing_arch)." ebuilds to check"); // Get the arches from the database $db_arches = $db->getAssoc("SELECT name, id FROM arch;"); //FIXME rewrite this entire thing in SQL - foreach($arr as $ebuild_id => $keywords) { - - if(!empty($keywords)) - $arr = arrKeywords($keywords, $arr_arches); - else { - $arr = array(); - } - - if(count($arr)) { - foreach($arr as $arch => $status) { + if(count($arr)) { + foreach($arr_missing_arch as $ebuild => $keywords) { - if($db_arches[$arch]) { - $arr_insert = array( - 'ebuild' => $ebuild_id, - 'arch' => $db_arches[$arch], - 'status' => $status, - ); - - $db->autoExecute('ebuild_arch', $arr_insert, MDB2_AUTOQUERY_INSERT); + if(!empty($keywords)) + $arr = arrKeywords($keywords, $arr_arches); + else { + $arr = array(); + } + + // Status in this case is the keyword, not the import status + if(count($arr)) { + foreach($arr as $arch => $status) { + + if($db_arches[$arch]) { + $arr_insert = array( + 'ebuild' => $ebuild, + 'arch' => $db_arches[$arch], + 'status' => $status, + ); + + $db->autoExecute('ebuild_arch', $arr_insert, MDB2_AUTOQUERY_INSERT); + } } } } - - } /** diff --git a/import.ebuild_homepage.php b/import.ebuild_homepage.php index 57250f1..85a64c1 100644 --- a/import.ebuild_homepage.php +++ b/import.ebuild_homepage.php @@ -1,39 +1,42 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; // Find all the ebuilds that are missing ebuild arch $sql = "SELECT id, metadata FROM missing_homepage;"; - $arr = $db->getAssoc($sql); + $arr_missing_homepage = $db->getAssoc($sql); if($verbose) shell::msg(count($arr)." ebuilds to check"); - foreach($arr as $ebuild_id => $homepages) { - - if(!empty($homepages)) - $arr = arrHomepages($homepages); - else { - $arr = array(); - } - - if(count($arr)) { - foreach($arr as $homepage) { + if(count($arr_missing_homepage)) { + foreach($arr_missing_homepage as $ebuild => $str) { + + if(!empty($str)) { + $arr = arrHomepages($str); - $arr_insert = array( - 'ebuild' => $ebuild_id, - 'homepage' => $homepage, - ); - - $db->autoExecute('ebuild_homepage', $arr_insert, MDB2_AUTOQUERY_INSERT); + if(count($arr)) { + foreach($arr as $url) { + + $arr_insert = array( + 'ebuild' => $ebuild, + 'homepage' => $url, + ); + + $db->autoExecute('ebuild_homepage', $arr_insert, MDB2_AUTOQUERY_INSERT); + } + } } } - - } /** diff --git a/import.ebuild_license.php b/import.ebuild_license.php index f62868f..22c7b6c 100644 --- a/import.ebuild_license.php +++ b/import.ebuild_license.php @@ -1,18 +1,22 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; // Get the arches - $tree = new PortageTree(); $arr_licenses = $tree->getLicenses(); // Find all the ebuilds that are missing ebuild arch - $sql = "SELECT id, metadata FROM missing_license;"; - $arr = $db->getAssoc($sql); + $sql = "SELECT ebuild, metadata FROM missing_license;"; + $arr_missing_license = $db->getAssoc($sql); if($verbose) shell::msg(count($arr)." ebuilds to check"); @@ -20,28 +24,26 @@ // Get the licenses from the database $db_licenses = $db->getAssoc("SELECT name, id FROM license;"); - foreach($arr as $ebuild_id => $str) { - - if(!empty($str)) { - $arr = arrLicenses($str, $arr_licenses); - } else { - $arr = array(); - } - - if(count($arr)) { - foreach($arr as $str) { - if($db_licenses[$str]) { - $arr_insert = array( - 'ebuild' => $ebuild_id, - 'license' => $db_licenses[$str], - ); - - $db->autoExecute('ebuild_license', $arr_insert, MDB2_AUTOQUERY_INSERT); + if(count($arr_missing_license)) { + foreach($arr_missing_license as $ebuild => $str) { + + if(!empty($str)) { + $arr = arrLicenses($str, $arr_licenses); + + if(count($arr)) { + foreach($arr as $str) { + if($db_licenses[$str]) { + $arr_insert = array( + 'ebuild' => $ebuild, + 'license' => $db_licenses[$str], + ); + + $db->autoExecute('ebuild_license', $arr_insert, MDB2_AUTOQUERY_INSERT); + } + } } } } - - } /** diff --git a/import.ebuild_mask.php b/import.ebuild_mask.php index 78a27e5..1fda64f 100644 --- a/import.ebuild_mask.php +++ b/import.ebuild_mask.php @@ -10,7 +10,12 @@ require_once 'header.php'; require_once 'import.functions.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; diff --git a/import.ebuild_metadata.php b/import.ebuild_metadata.php index 2ccb6b7..1a39fee 100644 --- a/import.ebuild_metadata.php +++ b/import.ebuild_metadata.php @@ -5,28 +5,40 @@ // $debug = true; + /** + * It may seem a little odd, and to break normalization, to have a query to set the description on the package + * table when it can be queried from the ebuilds. The fact is this is just one of many shortcuts taken, since + * the site is a snapshot, and information like that is not required in realtime. Not to mention it makes + * life a whole lot easier. + */ + require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; // Find all the ebuilds that are missing ebuild arch $sql = "SELECT * FROM missing_metadata;"; - $arr = $db->getAssoc($sql); + $arr = $db->getAll($sql); if($verbose) shell::msg(number_format(count($arr))." ebuilds to check"); - foreach($arr as $ebuild_id => $row) { + foreach($arr as $row) { extract($row); - $obj_ebuild = new PortageEbuild("$category/$pf"); + $e = new PortageEbuild("$category_name/$pf"); if($debug) - shell::msg("[$category/".$obj_ebuild->pn."]"); + shell::msg("[$category_name/".$e->pn."]"); - $arr_metadata = $obj_ebuild->metadata(); + $arr_metadata = $e->metadata(); if(count($arr_metadata)) { @@ -34,7 +46,7 @@ if(!empty($value)) { $arr_insert = array( - 'ebuild' => $ebuild_id, + 'ebuild' => $ebuild, 'keyword' => $keyword, 'value' => $value, ); @@ -44,7 +56,7 @@ } } else { if($verbose || $qa) - shell::msg("[QA] No metadata: $category/".$obj_ebuild->pf); + shell::msg("[QA] No metadata: $category_name/".$e->pf); } } diff --git a/import.ebuilds.php b/import.ebuilds.php index 450ff27..0eae3a5 100644 --- a/import.ebuilds.php +++ b/import.ebuilds.php @@ -20,17 +20,36 @@ * For now, you're going to have to manually flip some bits to get it to correct mistakes * like that. It's too much of a pain to have it check for it (at this point). */ + + /** + * This script is similar to the package one, in that it will create a temporary file + * and set the mtime to the last package, and then look for any new changes. Makes the + * find utility do all the heavy lifting, and is much simpler. + * + * Also, this updates the DB with the mtime of both the actual ebuild and the cache file. + * It seems like they are usually the same mtime, though. + * + * While it may seem odd, any time an ebuild is "changed" (as in, the mtime is different), it is + * actually re-inserted as a new ebuild all over again. It would be too much work to go + * through all the scripts and compare differences between old and new data; it is far easier to + * simply re-import the data as if it was newly created. New ebuilds are flagged with a status of 1 + * and should be ignored by the website. Unchanged ebuilds are flagged with a status of 0 and ones + * marked for removal with a 2. The website should always pull the ones where the status is 0 or 2. + * + */ // $verbose = true; // $debug = false; // $all = false; - if($debug) { - $verbose = true; - } require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; @@ -62,7 +81,6 @@ } } - $tree = new PortageTree(); $categories = $tree->getCategories(); $arr_import = array(); diff --git a/import.eclasses.php b/import.eclasses.php index b7fd142..2c2912d 100644 --- a/import.eclasses.php +++ b/import.eclasses.php @@ -1,9 +1,12 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; - $tree = new PortageTree(); + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + $table = 'eclass'; $arr = $tree->getEclasses(); diff --git a/import.herds.php b/import.herds.php index 9f06f2c..dc70ea2 100644 --- a/import.herds.php +++ b/import.herds.php @@ -1,9 +1,12 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; - $tree = new PortageTree(); + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + $table = 'herd'; $arr = $tree->getHerds(); diff --git a/import.licenses.php b/import.licenses.php index 1d10d6e..0ce22fb 100644 --- a/import.licenses.php +++ b/import.licenses.php @@ -1,9 +1,12 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; - - $tree = new PortageTree(); + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + $table = 'license'; $arr = $tree->getLicenses(); diff --git a/import.package_mask.php b/import.package_mask.php index cdd4d73..283f64a 100644 --- a/import.package_mask.php +++ b/import.package_mask.php @@ -17,7 +17,12 @@ */ require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.portage.ebuild.php'; diff --git a/import.packages.php b/import.packages.php index de8db86..8bc0ce2 100644 --- a/import.packages.php +++ b/import.packages.php @@ -3,8 +3,40 @@ // $verbose = true; // $debug = false; + /** + * This script creates a temporary file in /tmp named znurt[foo] that sets itself + * to the mtime of the latest package mtime in the database. This way, I can simply + * use find to do all the heavy lifting to quickly locate any package directories + * that were modified since last import. + * + * Directory names in portage tend to get their mtimes updated on a regular basis; + * generally speaking, I'd say that about 50% of them change each sync, though + * I can't pin down why. Packages that haven't been touched in ages get their + * directory modified for no reason I can see. + * + * As a result, the mtime of a package is notoriously unreliable as a reference + * for anything. However, if it does change, it *can* indicate that an ebuild or + * file was removed, so, with all due diligence, we will check those later to see + * if something was actually taken away, and update the database. + */ + + /** + * This is the first file where tables start to have a status column. There is + * only three status levels: 0 - completely imported, and "live", 1 - being updated, + * or newly inserted and 2 - flagged to be removed. + * + * The package table only uses 0 and 1. The website should ignore the status, since + * the changes are only to notify the other scripts that something has changed, and to + * look more closely at the files related to the package. + */ + require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.category.php'; require_once 'class.portage.package.php'; require_once 'class.db.package.php'; @@ -28,7 +60,6 @@ if(!$all) { - $tree = new PortageTree(); $categories = $tree->getCategories(); $tmp = tempnam('/tmp', 'znurt'); @@ -76,6 +107,8 @@ $arr_diff = importDiff('package', $arr_packages, "category = $category_id"); // FIXME Flag to be deleted, execute later + // This is dangerous to delete right now because 1) it will take a *long* time, and + // 2) you're breaking the whole "snapshot" approach. if(count($arr_diff['delete'])) { foreach($arr_diff['delete'] as $package_name) { $sql = "DELETE FROM $table WHERE name = ".$db->quote($package_name)." AND category = $category_id;"; diff --git a/import.use_expand.php b/import.use_expand.php index 8529abf..7fc18d6 100644 --- a/import.use_expand.php +++ b/import.use_expand.php @@ -1,7 +1,12 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.use_flag.php'; require_once 'class.portage.ebuild.php'; require_once 'class.portage.atom.php'; @@ -9,8 +14,6 @@ require_once 'class.db.use.php'; require_once 'File/Find.php'; - $tree = new PortageTree(); - // Expand use flags $type = 'expand'; $arr_new[$type] = $arr_delete[$type] = $arr_use_flags[$type] = array(); diff --git a/import.use_flags.php b/import.use_flags.php index 1b42e25..bd626f2 100644 --- a/import.use_flags.php +++ b/import.use_flags.php @@ -1,7 +1,12 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.use_flag.php'; require_once 'class.portage.ebuild.php'; require_once 'class.portage.atom.php'; @@ -9,8 +14,6 @@ require_once '/home/steve/svn/znurt/class.db.use.php'; require_once 'File/Find.php'; - $tree = new PortageTree(); - // Local use flags $type = 'local'; $u = new PortageUseFlag($type); diff --git a/import.use_local.php b/import.use_local.php index 5b7f6ca..00f9f77 100644 --- a/import.use_local.php +++ b/import.use_local.php @@ -1,7 +1,12 @@ <? require_once 'header.php'; - require_once 'class.portage.tree.php'; + + if(!$tree) { + require_once 'class.portage.tree.php'; + $tree = new PortageTree(); + } + require_once 'class.portage.use_flag.php'; require_once 'class.portage.ebuild.php'; require_once 'class.portage.atom.php'; @@ -10,8 +15,6 @@ require_once 'class.db.package_use.php'; require_once 'File/Find.php'; - $tree = new PortageTree(); - // Local use flags $type = 'local'; $u = new PortageUseFlag($type); |